From 978bd0278f90b5e71a5587ffa9804b43f4996f8c Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 13 Jan 2020 13:17:39 -0300 Subject: dt-bindings: vendor-prefixes: Add Shenzhen Frida LCD Co., Ltd. Add an entry for Shenzhen Frida LCD Co., Ltd. v2: No change v3: No change Signed-off-by: Paul Cercueil Acked-by: Sam Ravnborg Acked-by: Rob Herring Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200113161741.32061-1-paul@crapouillou.net --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 835579edc971..76069bb51ea4 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -337,6 +337,8 @@ patternProperties: description: Firefly "^focaltech,.*": description: FocalTech Systems Co.,Ltd + "^frida,.*": + description: Shenzhen Frida LCD Co., Ltd. "^friendlyarm,.*": description: Guangzhou FriendlyARM Computer Tech Co., Ltd "^fsl,.*": -- cgit v1.2.3 From 7ab618c9a7e6d3f941b2d71441af218823a81fc2 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Mon, 13 Jan 2020 13:17:40 -0300 Subject: dt-bindings: panel-simple: Add compatible for Frida FRD350H54004 LCD Add bindings documentation for the Frida 3.5" (320x240 pixels) 24-bit TFT LCD panel. v2: Switch documentation from plain text to YAML v3: Simply add new compatible to panel-simple.yaml file instead of adding new file Signed-off-by: Paul Cercueil Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200113161741.32061-2-paul@crapouillou.net --- Documentation/devicetree/bindings/display/panel/panel-simple.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index 8fe60ee2531c..4a8064e31793 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -37,6 +37,8 @@ properties: - auo,b116xa01 # BOE NV140FHM-N49 14.0" FHD a-Si FT panel - boe,nv140fhmn49 + # Frida FRD350H54004 3.5" QVGA TFT LCD panel + - frida,frd350h54004 # GiantPlus GPM940B0 3.0" QVGA TFT LCD panel - giantplus,gpm940b0 # Satoz SAT050AT40H12R2 5.0" WVGA TFT LCD panel -- cgit v1.2.3 From af6cb95cf641053a3c0a46d5deb736f7e85c44d9 Mon Sep 17 00:00:00 2001 From: Jitao Shi Date: Thu, 16 Jan 2020 10:15:07 +0800 Subject: dt-bindings: display: panel: Add boe tv101wum-n16 panel bindings Add documentation for "boe,tv101wum-n16", "auo,kd101n80-45na", "boe,tv101wum-n53" and "auo,b101uan08.3" panels. Signed-off-by: Jitao Shi Reviewed-by: Sam Ravnborg [dropped Rob from list of maintainers - verified on irc] Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200116021511.22675-2-jitao.shi@mediatek.com --- .../bindings/display/panel/boe,tv101wum-nl6.yaml | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml b/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml new file mode 100644 index 000000000000..740213459134 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/boe,tv101wum-nl6.yaml @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/boe,tv101wum-nl6.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: BOE TV101WUM-NL6 DSI Display Panel + +maintainers: + - Thierry Reding + - Sam Ravnborg + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + enum: + # BOE TV101WUM-NL6 10.1" WUXGA TFT LCD panel + - boe,tv101wum-nl6 + # AUO KD101N80-45NA 10.1" WUXGA TFT LCD panel + - auo,kd101n80-45na + # BOE TV101WUM-N53 10.1" WUXGA TFT LCD panel + - boe,tv101wum-n53 + # AUO B101UAN08.3 10.1" WUXGA TFT LCD panel + - auo,b101uan08.3 + + reg: + description: the virtual channel number of a DSI peripheral + + enable-gpios: + description: a GPIO spec for the enable pin + + pp1800-supply: + description: core voltage supply + + avdd-supply: + description: phandle of the regulator that provides positive voltage + + avee-supply: + description: phandle of the regulator that provides negative voltage + + backlight: + description: phandle of the backlight device attached to the panel + + port: true + +required: + - compatible + - reg + - enable-gpios + - pp1800-supply + - avdd-supply + - avee-supply + +additionalProperties: false + +examples: + - | + dsi { + #address-cells = <1>; + #size-cells = <0>; + panel@0 { + compatible = "boe,tv101wum-nl6"; + reg = <0>; + enable-gpios = <&pio 45 0>; + avdd-supply = <&ppvarn_lcd>; + avee-supply = <&ppvarp_lcd>; + pp1800-supply = <&pp1800_lcd>; + backlight = <&backlight_lcd0>; + status = "okay"; + port { + panel_in: endpoint { + remote-endpoint = <&dsi_out>; + }; + }; + }; + }; + +... -- cgit v1.2.3 From a32de0d2de90a0d1a9476522834a6723b3df0340 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 16 Jan 2020 11:36:32 +0800 Subject: dt-bindings: vendor-prefix: add Shenzhen Feixin Photoelectics Co., Ltd Shenzhen Feixin Photoelectics Co., Ltd is a company to provide LCD modules. Add its vendor prefix. Signed-off-by: Icenowy Zheng Acked-by: Sam Ravnborg Acked-by: Rob Herring Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200116033636.512461-2-icenowy@aosc.io --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 76069bb51ea4..f9b84f24a382 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -331,6 +331,8 @@ patternProperties: description: Fastrax Oy "^fcs,.*": description: Fairchild Semiconductor + "^feixin,.*": + description: Shenzhen Feixin Photoelectic Co., Ltd "^feiyang,.*": description: Shenzhen Fly Young Technology Co.,LTD. "^firefly,.*": -- cgit v1.2.3 From 44a90cc350a32a0519384b9c96a36ef9bd9f37f3 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 16 Jan 2020 11:36:33 +0800 Subject: dt-bindings: panel: add Feixin K101 IM2BA02 MIPI-DSI panel Feixin K101 IM2BA02 is a 10.1" 800x1280 4-lane MIPI-DSI panel. Add device tree binding for it. Signed-off-by: Icenowy Zheng Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200116033636.512461-3-icenowy@aosc.io --- .../display/panel/feixin,k101-im2ba02.yaml | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml b/Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml new file mode 100644 index 000000000000..927f1eea18d2 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/feixin,k101-im2ba02.yaml @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/feixin,k101-im2ba02.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Feixin K101 IM2BA02 10.1" MIPI-DSI LCD panel + +maintainers: + - Icenowy Zheng + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: feixin,k101-im2ba02 + reg: true + backlight: true + reset-gpios: true + avdd-supply: + description: regulator that supplies the AVDD voltage + dvdd-supply: + description: regulator that supplies the DVDD voltage + cvdd-supply: + description: regulator that supplies the CVDD voltage + +required: + - compatible + - reg + - avdd-supply + - dvdd-supply + - cvdd-supply + +additionalProperties: false + +examples: + - | + #include + + dsi { + #address-cells = <1>; + #size-cells = <0>; + panel@0 { + compatible = "feixin,k101-im2ba02"; + reg = <0>; + avdd-supply = <®_dc1sw>; + dvdd-supply = <®_dc1sw>; + cvdd-supply = <®_ldo_io1>; + reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; + backlight = <&backlight>; + }; + }; + +... -- cgit v1.2.3 From abdd9e3705c81bccbded82130e5963267f44e3d9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jan 2020 13:45:44 +0100 Subject: dt-bindings: display: sitronix,st7735r: Convert to DT schema Convert the DT binding documentation for Sitronix ST7735R displays to DT schema. Add a reference to the Adafruit 1.8" LCD while at it. Signed-off-by: Geert Uytterhoeven Reviewed-by: Sam Ravnborg Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200115124548.3951-2-geert+renesas@glider.be --- .../bindings/display/sitronix,st7735r.txt | 35 ------------ .../bindings/display/sitronix,st7735r.yaml | 65 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 35 deletions(-) delete mode 100644 Documentation/devicetree/bindings/display/sitronix,st7735r.txt create mode 100644 Documentation/devicetree/bindings/display/sitronix,st7735r.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.txt b/Documentation/devicetree/bindings/display/sitronix,st7735r.txt deleted file mode 100644 index cd5c7186890a..000000000000 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.txt +++ /dev/null @@ -1,35 +0,0 @@ -Sitronix ST7735R display panels - -This binding is for display panels using a Sitronix ST7735R controller in SPI -mode. - -Required properties: -- compatible: "jianda,jd-t18003-t01", "sitronix,st7735r" -- dc-gpios: Display data/command selection (D/CX) -- reset-gpios: Reset signal (RSTX) - -The node for this driver must be a child node of a SPI controller, hence -all mandatory properties described in ../spi/spi-bus.txt must be specified. - -Optional properties: -- rotation: panel rotation in degrees counter clockwise (0,90,180,270) -- backlight: phandle of the backlight device attached to the panel - -Example: - - backlight: backlight { - compatible = "gpio-backlight"; - gpios = <&gpio 44 GPIO_ACTIVE_HIGH>; - }; - - ... - - display@0{ - compatible = "jianda,jd-t18003-t01", "sitronix,st7735r"; - reg = <0>; - spi-max-frequency = <32000000>; - dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; - reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>; - rotation = <270>; - backlight = &backlight; - }; diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml new file mode 100644 index 000000000000..21bccc91f742 --- /dev/null +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/sitronix,st7735r.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sitronix ST7735R Display Panels Device Tree Bindings + +maintainers: + - David Lechner + +description: + This binding is for display panels using a Sitronix ST7735R controller in + SPI mode. + +allOf: + - $ref: panel/panel-common.yaml# + +properties: + compatible: + oneOf: + - description: + Adafruit 1.8" 160x128 Color TFT LCD (Product ID 358 or 618) + items: + - enum: + - jianda,jd-t18003-t01 + - const: sitronix,st7735r + + spi-max-frequency: + maximum: 32000000 + + dc-gpios: + maxItems: 1 + description: Display data/command selection (D/CX) + +required: + - compatible + - reg + - dc-gpios + - reset-gpios + +examples: + - | + #include + + backlight: backlight { + compatible = "gpio-backlight"; + gpios = <&gpio 44 GPIO_ACTIVE_HIGH>; + }; + + spi { + #address-cells = <1>; + #size-cells = <0>; + + display@0{ + compatible = "jianda,jd-t18003-t01", "sitronix,st7735r"; + reg = <0>; + spi-max-frequency = <32000000>; + dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>; + rotation = <270>; + }; + }; + +... -- cgit v1.2.3 From 3a1a6be40bd7ef6c98dcdfa7937ff3279206e92e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 15 Jan 2020 13:45:45 +0100 Subject: dt-bindings: display: sitronix,st7735r: Add Okaya RH128128T Document support for the Okaya RH128128T display, which is a 128x128 1.44" TFT display driven by a Sitronix ST7715R TFT Controller/Driver. It can be found on e.g. the Renesas YRSK-LCD-PMOD extension board, which comes with various Renesas development kits (e.g. Renesas Starter Kit+ for RZ/A1H[1]). ST7715R and ST7735R are very similar. Their major difference is that the former is restricted to displays of up to 132x132 pixels, while the latter supports displays up to 132x162 pixels. [1] https://renesasrulz.com/the_vault/f/archive-forum/4981/upgrading-to-the-renesas-rz-a1h Signed-off-by: Geert Uytterhoeven Reviewed-by: Sam Ravnborg Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200115124548.3951-3-geert+renesas@glider.be --- .../devicetree/bindings/display/sitronix,st7735r.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml index 21bccc91f742..8892d79e6e10 100644 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -10,8 +10,8 @@ maintainers: - David Lechner description: - This binding is for display panels using a Sitronix ST7735R controller in - SPI mode. + This binding is for display panels using a Sitronix ST7715R or ST7735R + controller in SPI mode. allOf: - $ref: panel/panel-common.yaml# @@ -25,6 +25,12 @@ properties: - enum: - jianda,jd-t18003-t01 - const: sitronix,st7735r + - description: + Okaya 1.44" 128x128 Color TFT LCD (E.g. Renesas YRSK-LCD-PMOD) + items: + - enum: + - okaya,rh128128t + - const: sitronix,st7715r spi-max-frequency: maximum: 32000000 -- cgit v1.2.3 From cafddd6080273578c900ac67817955ff47c6ea73 Mon Sep 17 00:00:00 2001 From: Jitao Shi Date: Mon, 30 Dec 2019 10:04:18 +0100 Subject: Documentation: bridge: Add documentation for ps8640 DT properties Add documentation for DT properties supported by ps8640 DSI-eDP converter. Signed-off-by: Jitao Shi Acked-by: Rob Herring Reviewed-by: Philipp Zabel Signed-off-by: Ulrich Hecht Signed-off-by: Enric Balletbo i Serra Reviewed-by: Rob Herring Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20191230090419.137141-2-enric.balletbo@collabora.com --- .../devicetree/bindings/display/bridge/ps8640.yaml | 112 +++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/bridge/ps8640.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml new file mode 100644 index 000000000000..5dff93641bea --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/bridge/ps8640.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MIPI DSI to eDP Video Format Converter Device Tree Bindings + +maintainers: + - Nicolas Boichat + - Enric Balletbo i Serra + +description: | + The PS8640 is a low power MIPI-to-eDP video format converter supporting + mobile devices with embedded panel resolutions up to 2048 x 1536. The + device accepts a single channel of MIPI DSI v1.1, with up to four lanes + plus clock, at a transmission rate up to 1.5Gbit/sec per lane. The + device outputs eDP v1.4, one or two lanes, at a link rate of up to + 3.24Gbit/sec per lane. + +properties: + compatible: + const: parade,ps8640 + + reg: + maxItems: 1 + description: Base I2C address of the device. + + powerdown-gpios: + maxItems: 1 + description: GPIO connected to active low powerdown. + + reset-gpios: + maxItems: 1 + description: GPIO connected to active low reset. + + vdd12-supply: + maxItems: 1 + description: Regulator for 1.2V digital core power. + + vdd33-supply: + maxItems: 1 + description: Regulator for 3.3V digital core power. + + ports: + type: object + description: + A node containing DSI input & output port nodes with endpoint + definitions as documented in + Documentation/devicetree/bindings/media/video-interfaces.txt + Documentation/devicetree/bindings/graph.txt + properties: + port@0: + type: object + description: | + Video port for DSI input + + port@1: + type: object + description: | + Video port for eDP output (panel or connector). + + required: + - port@0 + +required: + - compatible + - reg + - powerdown-gpios + - reset-gpios + - vdd12-supply + - vdd33-supply + - ports + +additionalProperties: false + +examples: + - | + #include + i2c0 { + #address-cells = <1>; + #size-cells = <0>; + + ps8640: edp-bridge@18 { + compatible = "parade,ps8640"; + reg = <0x18>; + powerdown-gpios = <&pio 116 GPIO_ACTIVE_LOW>; + reset-gpios = <&pio 115 GPIO_ACTIVE_LOW>; + vdd12-supply = <&ps8640_fixed_1v2>; + vdd33-supply = <&mt6397_vgp2_reg>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + ps8640_in: endpoint { + remote-endpoint = <&dsi0_out>; + }; + }; + + port@1 { + reg = <1>; + ps8640_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; + }; + }; + -- cgit v1.2.3 From f5df7369d5438917a679562d57d19b31ea70ada2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Sun, 19 Jan 2020 15:09:07 -0600 Subject: dt-bindings: display: Convert a bunch of panels to DT schema Convert all the 'simple' panels which match the constraints of the common panel-simple.yaml schema. This conversion is based on how the panels are documented. Some may turn out to be more complex once the schema is applied to actual dts files. Cc: Maxime Ripard Cc: Chen-Yu Tsai Cc: Thierry Reding Reviewed-by: Sam Ravnborg Signed-off-by: Rob Herring Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200119210907.24152-1-robh@kernel.org --- .../bindings/display/panel/auo,b080uan01.txt | 7 - .../bindings/display/panel/auo,b101aw03.txt | 7 - .../bindings/display/panel/auo,b101ean01.txt | 7 - .../bindings/display/panel/auo,b101xtn01.txt | 7 - .../bindings/display/panel/auo,b116xw03.txt | 7 - .../bindings/display/panel/auo,b133htn01.txt | 7 - .../bindings/display/panel/auo,b133xtn01.txt | 7 - .../bindings/display/panel/auo,g070vvn01.txt | 29 --- .../bindings/display/panel/auo,g101evn010.txt | 12 -- .../bindings/display/panel/auo,g104sn02.txt | 12 -- .../bindings/display/panel/auo,g133han01.txt | 7 - .../bindings/display/panel/auo,g185han01.txt | 7 - .../bindings/display/panel/auo,p320hvn03.txt | 8 - .../bindings/display/panel/auo,t215hvn01.txt | 7 - .../bindings/display/panel/avic,tm070ddh03.txt | 7 - .../bindings/display/panel/boe,hv070wsa-100.txt | 28 --- .../bindings/display/panel/boe,nv101wxmn51.txt | 7 - .../bindings/display/panel/boe,tv080wum-nl0.txt | 7 - .../display/panel/cdtech,s043wq26h-ct7.txt | 12 -- .../display/panel/cdtech,s070wv95-ct16.txt | 12 -- .../display/panel/chunghwa,claa070wp03xg.txt | 7 - .../display/panel/chunghwa,claa101wa01a.txt | 7 - .../display/panel/chunghwa,claa101wb03.txt | 7 - .../display/panel/dataimage,scf0700c48ggu18.txt | 8 - .../bindings/display/panel/dlc,dlc1010gig.txt | 12 -- .../bindings/display/panel/edt,et-series.txt | 55 ------ .../display/panel/evervision,vgg804821.txt | 12 -- .../display/panel/foxlink,fl500wvr00-a0t.txt | 7 - .../bindings/display/panel/friendlyarm,hd702e.txt | 32 ---- .../display/panel/giantplus,gpg482739qs5.txt | 7 - .../bindings/display/panel/hannstar,hsd070pww1.txt | 7 - .../bindings/display/panel/hannstar,hsd100pxn1.txt | 7 - .../bindings/display/panel/hit,tx23d38vm0caa.txt | 7 - .../bindings/display/panel/innolux,at043tn24.txt | 7 - .../bindings/display/panel/innolux,at070tn92.txt | 7 - .../bindings/display/panel/innolux,g070y2-l01.txt | 12 -- .../bindings/display/panel/innolux,g101ice-l01.txt | 7 - .../bindings/display/panel/innolux,g121i1-l01.txt | 7 - .../bindings/display/panel/innolux,g121x1-l03.txt | 7 - .../bindings/display/panel/innolux,n116bge.txt | 7 - .../bindings/display/panel/innolux,n156bge-l21.txt | 7 - .../bindings/display/panel/innolux,zj070na-01p.txt | 7 - .../bindings/display/panel/koe,tx14d24vm1bpa.txt | 42 ----- .../bindings/display/panel/koe,tx31d200vm0baa.txt | 25 --- .../bindings/display/panel/kyo,tcg121xglp.txt | 7 - .../display/panel/lemaker,bl035-rgb-002.txt | 12 -- .../bindings/display/panel/lg,lb070wv8.txt | 7 - .../bindings/display/panel/lg,lp079qx1-sp0v.txt | 7 - .../bindings/display/panel/lg,lp097qx1-spa1.txt | 7 - .../bindings/display/panel/lg,lp120up1.txt | 7 - .../bindings/display/panel/lg,lp129qe.txt | 7 - .../display/panel/mitsubishi,aa070mc01.txt | 7 - .../bindings/display/panel/nec,nl12880b20-05.txt | 8 - .../bindings/display/panel/nec,nl4827hc19-05b.txt | 7 - .../bindings/display/panel/netron-dy,e231732.txt | 7 - .../panel/newhaven,nhd-4.3-480272ef-atxl.txt | 7 - .../display/panel/nlt,nl192108ac18-02d.txt | 8 - .../devicetree/bindings/display/panel/nvd,9128.txt | 7 - .../display/panel/okaya,rs800480t-7x0gp.txt | 7 - .../display/panel/olimex,lcd-olinuxino-43-ts.txt | 7 - .../bindings/display/panel/ontat,yx700wv03.txt | 7 - .../display/panel/ortustech,com37h3m05dtc.txt | 12 -- .../display/panel/ortustech,com37h3m99dtc.txt | 12 -- .../display/panel/ortustech,com43h4m85ulc.txt | 7 - .../display/panel/osddisplays,osd070t1718-19ts.txt | 12 -- .../display/panel/osddisplays,osd101t2045-53ts.txt | 11 -- .../display/panel/panasonic,vvx10f004b00.txt | 7 - .../bindings/display/panel/panel-simple.yaml | 195 +++++++++++++++++++++ .../display/panel/qiaodian,qd43003c0-40.txt | 7 - .../display/panel/rocktech,rk070er9427.txt | 25 --- .../display/panel/samsung,lsn122dl01-c01.txt | 7 - .../bindings/display/panel/samsung,ltn101nt05.txt | 7 - .../display/panel/samsung,ltn140at29-301.txt | 7 - .../bindings/display/panel/sharp,lq035q7db03.txt | 12 -- .../bindings/display/panel/sharp,lq070y3dg3b.txt | 12 -- .../bindings/display/panel/sharp,lq101k1ly04.txt | 7 - .../bindings/display/panel/sharp,lq123p1jx31.txt | 7 - .../display/panel/shelly,sca07010-bfn-lnn.txt | 7 - .../bindings/display/panel/starry,kr122ea0sra.txt | 7 - .../bindings/display/panel/tianma,tm070jdhg30.txt | 7 - .../bindings/display/panel/tianma,tm070rvhg71.txt | 29 --- .../display/panel/toshiba,lt089ac29000.txt | 8 - .../bindings/display/panel/tpk,f07a-0102.txt | 8 - .../bindings/display/panel/tpk,f10a-0102.txt | 8 - .../bindings/display/panel/urt,umsh-8596md.txt | 16 -- .../bindings/display/panel/vl050_8048nt_c01.txt | 12 -- .../bindings/display/panel/winstar,wf35ltiacd.txt | 48 ----- 87 files changed, 195 insertions(+), 942 deletions(-) delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,b080uan01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,b101aw03.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,b101ean01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,b101xtn01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,b116xw03.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,b133htn01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,b133xtn01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,g070vvn01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,g101evn010.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,g104sn02.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,g133han01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,g185han01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,p320hvn03.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/auo,t215hvn01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/avic,tm070ddh03.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/boe,hv070wsa-100.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/boe,nv101wxmn51.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/boe,tv080wum-nl0.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/cdtech,s043wq26h-ct7.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/cdtech,s070wv95-ct16.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/chunghwa,claa070wp03xg.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/chunghwa,claa101wa01a.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/chunghwa,claa101wb03.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/dataimage,scf0700c48ggu18.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/dlc,dlc1010gig.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/edt,et-series.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/foxlink,fl500wvr00-a0t.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/giantplus,gpg482739qs5.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/hannstar,hsd070pww1.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/hannstar,hsd100pxn1.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/hit,tx23d38vm0caa.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,at043tn24.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,at070tn92.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,g070y2-l01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,g101ice-l01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,g121i1-l01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,g121x1-l03.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,n116bge.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,n156bge-l21.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/innolux,zj070na-01p.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/koe,tx31d200vm0baa.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/kyo,tcg121xglp.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/lemaker,bl035-rgb-002.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/lg,lb070wv8.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/lg,lp079qx1-sp0v.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/lg,lp097qx1-spa1.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/lg,lp120up1.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/lg,lp129qe.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/mitsubishi,aa070mc01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/nec,nl12880b20-05.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/nec,nl4827hc19-05b.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/netron-dy,e231732.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/newhaven,nhd-4.3-480272ef-atxl.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/nlt,nl192108ac18-02d.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/nvd,9128.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/okaya,rs800480t-7x0gp.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino-43-ts.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/ontat,yx700wv03.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/ortustech,com43h4m85ulc.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/osddisplays,osd070t1718-19ts.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/panasonic,vvx10f004b00.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/qiaodian,qd43003c0-40.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/rocktech,rk070er9427.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/samsung,lsn122dl01-c01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/samsung,ltn101nt05.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/samsung,ltn140at29-301.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/sharp,lq035q7db03.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/sharp,lq101k1ly04.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/sharp,lq123p1jx31.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/shelly,sca07010-bfn-lnn.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/starry,kr122ea0sra.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/tianma,tm070jdhg30.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/tianma,tm070rvhg71.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/toshiba,lt089ac29000.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/tpk,f07a-0102.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/tpk,f10a-0102.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/urt,umsh-8596md.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt delete mode 100644 Documentation/devicetree/bindings/display/panel/winstar,wf35ltiacd.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/auo,b080uan01.txt b/Documentation/devicetree/bindings/display/panel/auo,b080uan01.txt deleted file mode 100644 index bae0e2b51467..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,b080uan01.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 8.0" WUXGA TFT LCD panel - -Required properties: -- compatible: should be "auo,b101ean01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,b101aw03.txt b/Documentation/devicetree/bindings/display/panel/auo,b101aw03.txt deleted file mode 100644 index 72e088a4fb3a..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,b101aw03.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 10.1" WSVGA TFT LCD panel - -Required properties: -- compatible: should be "auo,b101aw03" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,b101ean01.txt b/Documentation/devicetree/bindings/display/panel/auo,b101ean01.txt deleted file mode 100644 index 3590b0741619..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,b101ean01.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 10.1" WSVGA TFT LCD panel - -Required properties: -- compatible: should be "auo,b101ean01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,b101xtn01.txt b/Documentation/devicetree/bindings/display/panel/auo,b101xtn01.txt deleted file mode 100644 index 889d511d66c9..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,b101xtn01.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 10.1" WXGA TFT LCD panel - -Required properties: -- compatible: should be "auo,b101xtn01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,b116xw03.txt b/Documentation/devicetree/bindings/display/panel/auo,b116xw03.txt deleted file mode 100644 index 690d0a568ef3..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,b116xw03.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 11.6" HD (1366x768) color TFT-LCD panel - -Required properties: -- compatible: should be "auo,b116xw03" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,b133htn01.txt b/Documentation/devicetree/bindings/display/panel/auo,b133htn01.txt deleted file mode 100644 index 302226b5bb55..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,b133htn01.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 13.3" FHD (1920x1080) color TFT-LCD panel - -Required properties: -- compatible: should be "auo,b133htn01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,b133xtn01.txt b/Documentation/devicetree/bindings/display/panel/auo,b133xtn01.txt deleted file mode 100644 index 7443b7c76769..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,b133xtn01.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 13.3" WXGA (1366x768) TFT LCD panel - -Required properties: -- compatible: should be "auo,b133xtn01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,g070vvn01.txt b/Documentation/devicetree/bindings/display/panel/auo,g070vvn01.txt deleted file mode 100644 index 49e4105378f6..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,g070vvn01.txt +++ /dev/null @@ -1,29 +0,0 @@ -AU Optronics Corporation 7.0" FHD (800 x 480) TFT LCD panel - -Required properties: -- compatible: should be "auo,g070vvn01" -- backlight: phandle of the backlight device attached to the panel -- power-supply: single regulator to provide the supply voltage - -Required nodes: -- port: Parallel port mapping to connect this display - -This panel needs single power supply voltage. Its backlight is conntrolled -via PWM signal. - -Example: --------- - -Example device-tree definition when connected to iMX6Q based board - - lcd_panel: lcd-panel { - compatible = "auo,g070vvn01"; - backlight = <&backlight_lcd>; - power-supply = <®_display>; - - port { - lcd_panel_in: endpoint { - remote-endpoint = <&lcd_display_out>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/display/panel/auo,g101evn010.txt b/Documentation/devicetree/bindings/display/panel/auo,g101evn010.txt deleted file mode 100644 index bc6a0c858e23..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,g101evn010.txt +++ /dev/null @@ -1,12 +0,0 @@ -AU Optronics Corporation 10.1" (1280x800) color TFT LCD panel - -Required properties: -- compatible: should be "auo,g101evn010" -- power-supply: as specified in the base binding - -Optional properties: -- backlight: as specified in the base binding -- enable-gpios: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,g104sn02.txt b/Documentation/devicetree/bindings/display/panel/auo,g104sn02.txt deleted file mode 100644 index 85626edf63e5..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,g104sn02.txt +++ /dev/null @@ -1,12 +0,0 @@ -AU Optronics Corporation 10.4" (800x600) color TFT LCD panel - -Required properties: -- compatible: should be "auo,g104sn02" -- power-supply: as specified in the base binding - -Optional properties: -- backlight: as specified in the base binding -- enable-gpios: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,g133han01.txt b/Documentation/devicetree/bindings/display/panel/auo,g133han01.txt deleted file mode 100644 index 3afc76747824..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,g133han01.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 13.3" FHD (1920x1080) TFT LCD panel - -Required properties: -- compatible: should be "auo,g133han01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,g185han01.txt b/Documentation/devicetree/bindings/display/panel/auo,g185han01.txt deleted file mode 100644 index ed657c2141d4..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,g185han01.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 18.5" FHD (1920x1080) TFT LCD panel - -Required properties: -- compatible: should be "auo,g185han01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,p320hvn03.txt b/Documentation/devicetree/bindings/display/panel/auo,p320hvn03.txt deleted file mode 100644 index 59bb6cd8aa75..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,p320hvn03.txt +++ /dev/null @@ -1,8 +0,0 @@ -AU Optronics Corporation 31.5" FHD (1920x1080) TFT LCD panel - -Required properties: -- compatible: should be "auo,p320hvn03" -- power-supply: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/auo,t215hvn01.txt b/Documentation/devicetree/bindings/display/panel/auo,t215hvn01.txt deleted file mode 100644 index cbd9da3f03b1..000000000000 --- a/Documentation/devicetree/bindings/display/panel/auo,t215hvn01.txt +++ /dev/null @@ -1,7 +0,0 @@ -AU Optronics Corporation 21.5" FHD (1920x1080) color TFT LCD panel - -Required properties: -- compatible: should be "auo,t215hvn01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/avic,tm070ddh03.txt b/Documentation/devicetree/bindings/display/panel/avic,tm070ddh03.txt deleted file mode 100644 index b6f2f3e8f44e..000000000000 --- a/Documentation/devicetree/bindings/display/panel/avic,tm070ddh03.txt +++ /dev/null @@ -1,7 +0,0 @@ -Shanghai AVIC Optoelectronics 7" 1024x600 color TFT-LCD panel - -Required properties: -- compatible: should be "avic,tm070ddh03" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/boe,hv070wsa-100.txt b/Documentation/devicetree/bindings/display/panel/boe,hv070wsa-100.txt deleted file mode 100644 index 55183d360032..000000000000 --- a/Documentation/devicetree/bindings/display/panel/boe,hv070wsa-100.txt +++ /dev/null @@ -1,28 +0,0 @@ -BOE HV070WSA-100 7.01" WSVGA TFT LCD panel - -Required properties: -- compatible: should be "boe,hv070wsa-100" -- power-supply: regulator to provide the VCC supply voltage (3.3 volts) -- enable-gpios: GPIO pin to enable and disable panel (active high) - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. - -The device node can contain one 'port' child node with one child -'endpoint' node, according to the bindings defined in [1]. This -node should describe panel's video bus. - -[1]: Documentation/devicetree/bindings/media/video-interfaces.txt - -Example: - - panel: panel { - compatible = "boe,hv070wsa-100"; - power-supply = <&vcc_3v3_reg>; - enable-gpios = <&gpd1 3 GPIO_ACTIVE_HIGH>; - port { - panel_ep: endpoint { - remote-endpoint = <&bridge_out_ep>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/display/panel/boe,nv101wxmn51.txt b/Documentation/devicetree/bindings/display/panel/boe,nv101wxmn51.txt deleted file mode 100644 index b258d6a91ec6..000000000000 --- a/Documentation/devicetree/bindings/display/panel/boe,nv101wxmn51.txt +++ /dev/null @@ -1,7 +0,0 @@ -BOE OPTOELECTRONICS TECHNOLOGY 10.1" WXGA TFT LCD panel - -Required properties: -- compatible: should be "boe,nv101wxmn51" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/boe,tv080wum-nl0.txt b/Documentation/devicetree/bindings/display/panel/boe,tv080wum-nl0.txt deleted file mode 100644 index 50be5e2438b2..000000000000 --- a/Documentation/devicetree/bindings/display/panel/boe,tv080wum-nl0.txt +++ /dev/null @@ -1,7 +0,0 @@ -Boe Corporation 8.0" WUXGA TFT LCD panel - -Required properties: -- compatible: should be "boe,tv080wum-nl0" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/cdtech,s043wq26h-ct7.txt b/Documentation/devicetree/bindings/display/panel/cdtech,s043wq26h-ct7.txt deleted file mode 100644 index 057f7f3f6dbe..000000000000 --- a/Documentation/devicetree/bindings/display/panel/cdtech,s043wq26h-ct7.txt +++ /dev/null @@ -1,12 +0,0 @@ -CDTech(H.K.) Electronics Limited 4.3" 480x272 color TFT-LCD panel - -Required properties: -- compatible: should be "cdtech,s043wq26h-ct7" -- power-supply: as specified in the base binding - -Optional properties: -- backlight: as specified in the base binding -- enable-gpios: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/cdtech,s070wv95-ct16.txt b/Documentation/devicetree/bindings/display/panel/cdtech,s070wv95-ct16.txt deleted file mode 100644 index 505615dfa0df..000000000000 --- a/Documentation/devicetree/bindings/display/panel/cdtech,s070wv95-ct16.txt +++ /dev/null @@ -1,12 +0,0 @@ -CDTech(H.K.) Electronics Limited 7" 800x480 color TFT-LCD panel - -Required properties: -- compatible: should be "cdtech,s070wv95-ct16" -- power-supply: as specified in the base binding - -Optional properties: -- backlight: as specified in the base binding -- enable-gpios: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/chunghwa,claa070wp03xg.txt b/Documentation/devicetree/bindings/display/panel/chunghwa,claa070wp03xg.txt deleted file mode 100644 index dd22685d2adc..000000000000 --- a/Documentation/devicetree/bindings/display/panel/chunghwa,claa070wp03xg.txt +++ /dev/null @@ -1,7 +0,0 @@ -Chunghwa Picture Tubes Ltd. 7" WXGA TFT LCD panel - -Required properties: -- compatible: should be "chunghwa,claa070wp03xg" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/chunghwa,claa101wa01a.txt b/Documentation/devicetree/bindings/display/panel/chunghwa,claa101wa01a.txt deleted file mode 100644 index f24614e4d5ec..000000000000 --- a/Documentation/devicetree/bindings/display/panel/chunghwa,claa101wa01a.txt +++ /dev/null @@ -1,7 +0,0 @@ -Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel - -Required properties: -- compatible: should be "chunghwa,claa101wa01a" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/chunghwa,claa101wb03.txt b/Documentation/devicetree/bindings/display/panel/chunghwa,claa101wb03.txt deleted file mode 100644 index 0ab2c05a4c22..000000000000 --- a/Documentation/devicetree/bindings/display/panel/chunghwa,claa101wb03.txt +++ /dev/null @@ -1,7 +0,0 @@ -Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel - -Required properties: -- compatible: should be "chunghwa,claa101wb03" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/dataimage,scf0700c48ggu18.txt b/Documentation/devicetree/bindings/display/panel/dataimage,scf0700c48ggu18.txt deleted file mode 100644 index 897085ee3cd4..000000000000 --- a/Documentation/devicetree/bindings/display/panel/dataimage,scf0700c48ggu18.txt +++ /dev/null @@ -1,8 +0,0 @@ -DataImage, Inc. 7" WVGA (800x480) TFT LCD panel with 24-bit parallel interface. - -Required properties: -- compatible: should be "dataimage,scf0700c48ggu18" -- power-supply: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/dlc,dlc1010gig.txt b/Documentation/devicetree/bindings/display/panel/dlc,dlc1010gig.txt deleted file mode 100644 index fbf5dcd15661..000000000000 --- a/Documentation/devicetree/bindings/display/panel/dlc,dlc1010gig.txt +++ /dev/null @@ -1,12 +0,0 @@ -DLC Display Co. DLC1010GIG 10.1" WXGA TFT LCD Panel - -Required properties: -- compatible: should be "dlc,dlc1010gig" -- power-supply: See simple-panel.txt - -Optional properties: -- enable-gpios: See simple-panel.txt -- backlight: See simple-panel.txt - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/edt,et-series.txt b/Documentation/devicetree/bindings/display/panel/edt,et-series.txt deleted file mode 100644 index b7ac1c725f97..000000000000 --- a/Documentation/devicetree/bindings/display/panel/edt,et-series.txt +++ /dev/null @@ -1,55 +0,0 @@ -Emerging Display Technology Corp. Displays -========================================== - - -Display bindings for EDT Display Technology Corp. Displays which are -compatible with the simple-panel binding, which is specified in -simple-panel.txt - -3,5" QVGA TFT Panels --------------------- -+-----------------+---------------------+-------------------------------------+ -| Identifier | compatbile | description | -+=================+=====================+=====================================+ -| ET035012DM6 | edt,et035012dm6 | 3.5" QVGA TFT LCD panel | -+-----------------+---------------------+-------------------------------------+ - -4,3" WVGA TFT Panels --------------------- - -+-----------------+---------------------+-------------------------------------+ -| Identifier | compatbile | description | -+=================+=====================+=====================================+ -| ETM0430G0DH6 | edt,etm0430g0dh6 | 480x272 TFT Display | -+-----------------+---------------------+-------------------------------------+ - -5,7" WVGA TFT Panels --------------------- - -+-----------------+---------------------+-------------------------------------+ -| Identifier | compatbile | description | -+=================+=====================+=====================================+ -| ET057090DHU | edt,et057090dhu | 5.7" VGA TFT LCD panel | -+-----------------+---------------------+-------------------------------------+ - - -7,0" WVGA TFT Panels --------------------- - -+-----------------+---------------------+-------------------------------------+ -| Identifier | compatbile | description | -+=================+=====================+=====================================+ -| ETM0700G0DH6 | edt,etm070080dh6 | WVGA TFT Display with capacitive | -| | edt,etm0700g0dh6 | Touchscreen | -+-----------------+---------------------+-------------------------------------+ -| ETM0700G0BDH6 | edt,etm070080bdh6 | Same as ETM0700G0DH6 but with | -| | | inverted pixel clock. | -+-----------------+---------------------+-------------------------------------+ -| ETM0700G0EDH6 | edt,etm070080edh6 | Same display as the ETM0700G0BDH6, | -| | | but with changed Hardware for the | -| | | backlight and the touch interface | -+-----------------+---------------------+-------------------------------------+ -| ET070080DH6 | edt,etm070080dh6 | Same timings as the ETM0700G0DH6, | -| | | but with resistive touch. | -+-----------------+---------------------+-------------------------------------+ - diff --git a/Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt b/Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt deleted file mode 100644 index 82d22e191ac3..000000000000 --- a/Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt +++ /dev/null @@ -1,12 +0,0 @@ -Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel - -Required properties: -- compatible: should be "evervision,vgg804821" -- power-supply: See simple-panel.txt - -Optional properties: -- backlight: See simple-panel.txt -- enable-gpios: See simple-panel.txt - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/foxlink,fl500wvr00-a0t.txt b/Documentation/devicetree/bindings/display/panel/foxlink,fl500wvr00-a0t.txt deleted file mode 100644 index b47f9d87bc19..000000000000 --- a/Documentation/devicetree/bindings/display/panel/foxlink,fl500wvr00-a0t.txt +++ /dev/null @@ -1,7 +0,0 @@ -Foxlink Group 5" WVGA TFT LCD panel - -Required properties: -- compatible: should be "foxlink,fl500wvr00-a0t" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt b/Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt deleted file mode 100644 index 6c9156fc3478..000000000000 --- a/Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt +++ /dev/null @@ -1,32 +0,0 @@ -FriendlyELEC HD702E 800x1280 LCD panel - -HD702E lcd is FriendlyELEC developed eDP LCD panel with 800x1280 -resolution. It has built in Goodix, GT9271 captive touchscreen -with backlight adjustable via PWM. - -Required properties: -- compatible: should be "friendlyarm,hd702e" -- power-supply: regulator to provide the supply voltage - -Optional properties: -- backlight: phandle of the backlight device attached to the panel - -Optional nodes: -- Video port for LCD panel input. - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. - -Example: - - panel { - compatible ="friendlyarm,hd702e", "simple-panel"; - backlight = <&backlight>; - power-supply = <&vcc3v3_sys>; - - port { - panel_in_edp: endpoint { - remote-endpoint = <&edp_out_panel>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/display/panel/giantplus,gpg482739qs5.txt b/Documentation/devicetree/bindings/display/panel/giantplus,gpg482739qs5.txt deleted file mode 100644 index 24b0b624434b..000000000000 --- a/Documentation/devicetree/bindings/display/panel/giantplus,gpg482739qs5.txt +++ /dev/null @@ -1,7 +0,0 @@ -GiantPlus GPG48273QS5 4.3" (480x272) WQVGA TFT LCD panel - -Required properties: -- compatible: should be "giantplus,gpg48273qs5" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/hannstar,hsd070pww1.txt b/Documentation/devicetree/bindings/display/panel/hannstar,hsd070pww1.txt deleted file mode 100644 index 7da1d5c038ff..000000000000 --- a/Documentation/devicetree/bindings/display/panel/hannstar,hsd070pww1.txt +++ /dev/null @@ -1,7 +0,0 @@ -HannStar Display Corp. HSD070PWW1 7.0" WXGA TFT LCD panel - -Required properties: -- compatible: should be "hannstar,hsd070pww1" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/hannstar,hsd100pxn1.txt b/Documentation/devicetree/bindings/display/panel/hannstar,hsd100pxn1.txt deleted file mode 100644 index 8270319a99de..000000000000 --- a/Documentation/devicetree/bindings/display/panel/hannstar,hsd100pxn1.txt +++ /dev/null @@ -1,7 +0,0 @@ -HannStar Display Corp. HSD100PXN1 10.1" XGA LVDS panel - -Required properties: -- compatible: should be "hannstar,hsd100pxn1" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/hit,tx23d38vm0caa.txt b/Documentation/devicetree/bindings/display/panel/hit,tx23d38vm0caa.txt deleted file mode 100644 index 04caaae19af6..000000000000 --- a/Documentation/devicetree/bindings/display/panel/hit,tx23d38vm0caa.txt +++ /dev/null @@ -1,7 +0,0 @@ -Hitachi Ltd. Corporation 9" WVGA (800x480) TFT LCD panel - -Required properties: -- compatible: should be "hit,tx23d38vm0caa" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,at043tn24.txt b/Documentation/devicetree/bindings/display/panel/innolux,at043tn24.txt deleted file mode 100644 index 4104226b61bc..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,at043tn24.txt +++ /dev/null @@ -1,7 +0,0 @@ -Innolux AT043TN24 4.3" WQVGA TFT LCD panel - -Required properties: -- compatible: should be "innolux,at043tn24" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,at070tn92.txt b/Documentation/devicetree/bindings/display/panel/innolux,at070tn92.txt deleted file mode 100644 index 3e10cd782491..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,at070tn92.txt +++ /dev/null @@ -1,7 +0,0 @@ -Innolux AT070TN92 7.0" WQVGA TFT LCD panel - -Required properties: -- compatible: should be "innolux,at070tn92" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,g070y2-l01.txt b/Documentation/devicetree/bindings/display/panel/innolux,g070y2-l01.txt deleted file mode 100644 index 7c234cf68e11..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,g070y2-l01.txt +++ /dev/null @@ -1,12 +0,0 @@ -Innolux G070Y2-L01 7" WVGA (800x480) TFT LCD panel - -Required properties: -- compatible: should be "innolux,g070y2-l01" -- power-supply: as specified in the base binding - -Optional properties: -- backlight: as specified in the base binding -- enable-gpios: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,g101ice-l01.txt b/Documentation/devicetree/bindings/display/panel/innolux,g101ice-l01.txt deleted file mode 100644 index 9e7590465227..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,g101ice-l01.txt +++ /dev/null @@ -1,7 +0,0 @@ -Innolux Corporation 10.1" G101ICE-L01 WXGA (1280x800) LVDS panel - -Required properties: -- compatible: should be "innolux,g101ice-l01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,g121i1-l01.txt b/Documentation/devicetree/bindings/display/panel/innolux,g121i1-l01.txt deleted file mode 100644 index 2743b07cd2f2..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,g121i1-l01.txt +++ /dev/null @@ -1,7 +0,0 @@ -Innolux Corporation 12.1" WXGA (1280x800) TFT LCD panel - -Required properties: -- compatible: should be "innolux,g121i1-l01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,g121x1-l03.txt b/Documentation/devicetree/bindings/display/panel/innolux,g121x1-l03.txt deleted file mode 100644 index 649744620ae1..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,g121x1-l03.txt +++ /dev/null @@ -1,7 +0,0 @@ -Innolux Corporation 12.1" G121X1-L03 XGA (1024x768) TFT LCD panel - -Required properties: -- compatible: should be "innolux,g121x1-l03" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,n116bge.txt b/Documentation/devicetree/bindings/display/panel/innolux,n116bge.txt deleted file mode 100644 index 081bb939ed31..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,n116bge.txt +++ /dev/null @@ -1,7 +0,0 @@ -Innolux Corporation 11.6" WXGA (1366x768) TFT LCD panel - -Required properties: -- compatible: should be "innolux,n116bge" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,n156bge-l21.txt b/Documentation/devicetree/bindings/display/panel/innolux,n156bge-l21.txt deleted file mode 100644 index 7825844aafdf..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,n156bge-l21.txt +++ /dev/null @@ -1,7 +0,0 @@ -InnoLux 15.6" WXGA TFT LCD panel - -Required properties: -- compatible: should be "innolux,n156bge-l21" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/innolux,zj070na-01p.txt b/Documentation/devicetree/bindings/display/panel/innolux,zj070na-01p.txt deleted file mode 100644 index 824f87f1526d..000000000000 --- a/Documentation/devicetree/bindings/display/panel/innolux,zj070na-01p.txt +++ /dev/null @@ -1,7 +0,0 @@ -Innolux Corporation 7.0" WSVGA (1024x600) TFT LCD panel - -Required properties: -- compatible: should be "innolux,zj070na-01p" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt b/Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt deleted file mode 100644 index be7ac666807b..000000000000 --- a/Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt +++ /dev/null @@ -1,42 +0,0 @@ -Kaohsiung Opto-Electronics Inc. 5.7" QVGA (320 x 240) TFT LCD panel - -Required properties: -- compatible: should be "koe,tx14d24vm1bpa" -- backlight: phandle of the backlight device attached to the panel -- power-supply: single regulator to provide the supply voltage - -Required nodes: -- port: Parallel port mapping to connect this display - -This panel needs single power supply voltage. Its backlight is conntrolled -via PWM signal. - -Example: --------- - -Example device-tree definition when connected to iMX53 based board - - lcd_panel: lcd-panel { - compatible = "koe,tx14d24vm1bpa"; - backlight = <&backlight_lcd>; - power-supply = <®_3v3>; - - port { - lcd_panel_in: endpoint { - remote-endpoint = <&lcd_display_out>; - }; - }; - }; - -Then one needs to extend the dispX node: - - lcd_display: disp1 { - - port@1 { - reg = <1>; - - lcd_display_out: endpoint { - remote-endpoint = <&lcd_panel_in>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/display/panel/koe,tx31d200vm0baa.txt b/Documentation/devicetree/bindings/display/panel/koe,tx31d200vm0baa.txt deleted file mode 100644 index 6a036ede3e28..000000000000 --- a/Documentation/devicetree/bindings/display/panel/koe,tx31d200vm0baa.txt +++ /dev/null @@ -1,25 +0,0 @@ -Kaohsiung Opto-Electronics. TX31D200VM0BAA 12.3" HSXGA LVDS panel - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. - -Required properties: -- compatible: should be "koe,tx31d200vm0baa" - -Optional properties: -- backlight: phandle of the backlight device attached to the panel - -Optional nodes: -- Video port for LVDS panel input. - -Example: - panel { - compatible = "koe,tx31d200vm0baa"; - backlight = <&backlight_lvds>; - - port { - panel_in: endpoint { - remote-endpoint = <&lvds0_out>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/display/panel/kyo,tcg121xglp.txt b/Documentation/devicetree/bindings/display/panel/kyo,tcg121xglp.txt deleted file mode 100644 index a8e940fe731e..000000000000 --- a/Documentation/devicetree/bindings/display/panel/kyo,tcg121xglp.txt +++ /dev/null @@ -1,7 +0,0 @@ -Kyocera Corporation 12.1" XGA (1024x768) TFT LCD panel - -Required properties: -- compatible: should be "kyo,tcg121xglp" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/lemaker,bl035-rgb-002.txt b/Documentation/devicetree/bindings/display/panel/lemaker,bl035-rgb-002.txt deleted file mode 100644 index 74ee7ea6b493..000000000000 --- a/Documentation/devicetree/bindings/display/panel/lemaker,bl035-rgb-002.txt +++ /dev/null @@ -1,12 +0,0 @@ -LeMaker BL035-RGB-002 3.5" QVGA TFT LCD panel - -Required properties: -- compatible: should be "lemaker,bl035-rgb-002" -- power-supply: as specified in the base binding - -Optional properties: -- backlight: as specified in the base binding -- enable-gpios: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/lg,lb070wv8.txt b/Documentation/devicetree/bindings/display/panel/lg,lb070wv8.txt deleted file mode 100644 index a7588e5259cf..000000000000 --- a/Documentation/devicetree/bindings/display/panel/lg,lb070wv8.txt +++ /dev/null @@ -1,7 +0,0 @@ -LG 7" (800x480 pixels) TFT LCD panel - -Required properties: -- compatible: should be "lg,lb070wv8" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/lg,lp079qx1-sp0v.txt b/Documentation/devicetree/bindings/display/panel/lg,lp079qx1-sp0v.txt deleted file mode 100644 index b9877acad012..000000000000 --- a/Documentation/devicetree/bindings/display/panel/lg,lp079qx1-sp0v.txt +++ /dev/null @@ -1,7 +0,0 @@ -LG LP079QX1-SP0V 7.9" (1536x2048 pixels) TFT LCD panel - -Required properties: -- compatible: should be "lg,lp079qx1-sp0v" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/lg,lp097qx1-spa1.txt b/Documentation/devicetree/bindings/display/panel/lg,lp097qx1-spa1.txt deleted file mode 100644 index 42141516f078..000000000000 --- a/Documentation/devicetree/bindings/display/panel/lg,lp097qx1-spa1.txt +++ /dev/null @@ -1,7 +0,0 @@ -LG 9.7" (2048x1536 pixels) TFT LCD panel - -Required properties: -- compatible: should be "lg,lp097qx1-spa1" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/lg,lp120up1.txt b/Documentation/devicetree/bindings/display/panel/lg,lp120up1.txt deleted file mode 100644 index 8c5de692c55c..000000000000 --- a/Documentation/devicetree/bindings/display/panel/lg,lp120up1.txt +++ /dev/null @@ -1,7 +0,0 @@ -LG 12.0" (1920x1280 pixels) TFT LCD panel - -Required properties: -- compatible: should be "lg,lp120up1" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/lg,lp129qe.txt b/Documentation/devicetree/bindings/display/panel/lg,lp129qe.txt deleted file mode 100644 index 9f262e0c5a2e..000000000000 --- a/Documentation/devicetree/bindings/display/panel/lg,lp129qe.txt +++ /dev/null @@ -1,7 +0,0 @@ -LG 12.9" (2560x1700 pixels) TFT LCD panel - -Required properties: -- compatible: should be "lg,lp129qe" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/mitsubishi,aa070mc01.txt b/Documentation/devicetree/bindings/display/panel/mitsubishi,aa070mc01.txt deleted file mode 100644 index 7d8f6eeef6d9..000000000000 --- a/Documentation/devicetree/bindings/display/panel/mitsubishi,aa070mc01.txt +++ /dev/null @@ -1,7 +0,0 @@ -Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel - -Required properties: -- compatible: should be "mitsubishi,aa070mc01-ca1" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/nec,nl12880b20-05.txt b/Documentation/devicetree/bindings/display/panel/nec,nl12880b20-05.txt deleted file mode 100644 index 71cbc49ecfab..000000000000 --- a/Documentation/devicetree/bindings/display/panel/nec,nl12880b20-05.txt +++ /dev/null @@ -1,8 +0,0 @@ -NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel - -Required properties: -- compatible: should be "nec,nl12880bc20-05" -- power-supply: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/nec,nl4827hc19-05b.txt b/Documentation/devicetree/bindings/display/panel/nec,nl4827hc19-05b.txt deleted file mode 100644 index 8e1914d1edb8..000000000000 --- a/Documentation/devicetree/bindings/display/panel/nec,nl4827hc19-05b.txt +++ /dev/null @@ -1,7 +0,0 @@ -NEC LCD Technologies,Ltd. WQVGA TFT LCD panel - -Required properties: -- compatible: should be "nec,nl4827hc19-05b" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/netron-dy,e231732.txt b/Documentation/devicetree/bindings/display/panel/netron-dy,e231732.txt deleted file mode 100644 index c6d06b5eab51..000000000000 --- a/Documentation/devicetree/bindings/display/panel/netron-dy,e231732.txt +++ /dev/null @@ -1,7 +0,0 @@ -Netron-DY E231732 7.0" WSVGA TFT LCD panel - -Required properties: -- compatible: should be "netron-dy,e231732" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/newhaven,nhd-4.3-480272ef-atxl.txt b/Documentation/devicetree/bindings/display/panel/newhaven,nhd-4.3-480272ef-atxl.txt deleted file mode 100644 index e78292b1a131..000000000000 --- a/Documentation/devicetree/bindings/display/panel/newhaven,nhd-4.3-480272ef-atxl.txt +++ /dev/null @@ -1,7 +0,0 @@ -Newhaven Display International 480 x 272 TFT LCD panel - -Required properties: -- compatible: should be "newhaven,nhd-4.3-480272ef-atxl" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/nlt,nl192108ac18-02d.txt b/Documentation/devicetree/bindings/display/panel/nlt,nl192108ac18-02d.txt deleted file mode 100644 index 1a639fd8778d..000000000000 --- a/Documentation/devicetree/bindings/display/panel/nlt,nl192108ac18-02d.txt +++ /dev/null @@ -1,8 +0,0 @@ -NLT Technologies, Ltd. 15.6" FHD (1920x1080) LVDS TFT LCD panel - -Required properties: -- compatible: should be "nlt,nl192108ac18-02d" -- power-supply: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/nvd,9128.txt b/Documentation/devicetree/bindings/display/panel/nvd,9128.txt deleted file mode 100644 index 17bcd017c678..000000000000 --- a/Documentation/devicetree/bindings/display/panel/nvd,9128.txt +++ /dev/null @@ -1,7 +0,0 @@ -New Vision Display 7.0" 800 RGB x 480 TFT LCD panel - -Required properties: -- compatible: should be "nvd,9128" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/okaya,rs800480t-7x0gp.txt b/Documentation/devicetree/bindings/display/panel/okaya,rs800480t-7x0gp.txt deleted file mode 100644 index ddf8e211d382..000000000000 --- a/Documentation/devicetree/bindings/display/panel/okaya,rs800480t-7x0gp.txt +++ /dev/null @@ -1,7 +0,0 @@ -OKAYA Electric America, Inc. RS800480T-7X0GP 7" WVGA LCD panel - -Required properties: -- compatible: should be "okaya,rs800480t-7x0gp" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino-43-ts.txt b/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino-43-ts.txt deleted file mode 100644 index 74540a090669..000000000000 --- a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino-43-ts.txt +++ /dev/null @@ -1,7 +0,0 @@ -Olimex 4.3" TFT LCD panel - -Required properties: -- compatible: should be "olimex,lcd-olinuxino-43-ts" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/ontat,yx700wv03.txt b/Documentation/devicetree/bindings/display/panel/ontat,yx700wv03.txt deleted file mode 100644 index 3d8a5e029242..000000000000 --- a/Documentation/devicetree/bindings/display/panel/ontat,yx700wv03.txt +++ /dev/null @@ -1,7 +0,0 @@ -On Tat Industrial Company 7" DPI TFT panel. - -Required properties: -- compatible: should be "ontat,yx700wv03" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt b/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt deleted file mode 100644 index c16907c02f80..000000000000 --- a/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt +++ /dev/null @@ -1,12 +0,0 @@ -OrtusTech COM37H3M05DTC Blanview 3.7" VGA portrait TFT-LCD panel - -Required properties: -- compatible: should be "ortustech,com37h3m05dtc" - -Optional properties: -- enable-gpios: GPIO pin to enable or disable the panel -- backlight: phandle of the backlight device attached to the panel -- power-supply: phandle of the regulator that provides the supply voltage - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt b/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt deleted file mode 100644 index 06a73c3f46b5..000000000000 --- a/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt +++ /dev/null @@ -1,12 +0,0 @@ -OrtusTech COM37H3M99DTC Blanview 3.7" VGA portrait TFT-LCD panel - -Required properties: -- compatible: should be "ortustech,com37h3m99dtc" - -Optional properties: -- enable-gpios: GPIO pin to enable or disable the panel -- backlight: phandle of the backlight device attached to the panel -- power-supply: phandle of the regulator that provides the supply voltage - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/ortustech,com43h4m85ulc.txt b/Documentation/devicetree/bindings/display/panel/ortustech,com43h4m85ulc.txt deleted file mode 100644 index de19e9398618..000000000000 --- a/Documentation/devicetree/bindings/display/panel/ortustech,com43h4m85ulc.txt +++ /dev/null @@ -1,7 +0,0 @@ -OrtusTech COM43H4M85ULC Blanview 3.7" TFT-LCD panel - -Required properties: -- compatible: should be "ortustech,com43h4m85ulc" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/osddisplays,osd070t1718-19ts.txt b/Documentation/devicetree/bindings/display/panel/osddisplays,osd070t1718-19ts.txt deleted file mode 100644 index e57883ccdf2f..000000000000 --- a/Documentation/devicetree/bindings/display/panel/osddisplays,osd070t1718-19ts.txt +++ /dev/null @@ -1,12 +0,0 @@ -OSD Displays OSD070T1718-19TS 7" WVGA TFT LCD panel - -Required properties: -- compatible: shall be "osddisplays,osd070t1718-19ts" -- power-supply: see simple-panel.txt - -Optional properties: -- backlight: see simple-panel.txt - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. No other simple-panel properties than -the ones specified herein are valid. diff --git a/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt b/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt deleted file mode 100644 index 85c0b2cacfda..000000000000 --- a/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt +++ /dev/null @@ -1,11 +0,0 @@ -One Stop Displays OSD101T2045-53TS 10.1" 1920x1200 panel - -Required properties: -- compatible: should be "osddisplays,osd101t2045-53ts" -- power-supply: as specified in the base binding - -Optional properties: -- backlight: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f004b00.txt b/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f004b00.txt deleted file mode 100644 index d328b0341bf4..000000000000 --- a/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f004b00.txt +++ /dev/null @@ -1,7 +0,0 @@ -Panasonic Corporation 10.1" WUXGA TFT LCD panel - -Required properties: -- compatible: should be "panasonic,vvx10f004b00" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index 4a8064e31793..4802ec18b927 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -33,18 +33,213 @@ properties: - ampire,am-480272h3tmqw-t01h # Ampire AM-800480R3TMQW-A1H 7.0" WVGA TFT LCD panel - ampire,am800480r3tmqwa1h + # AU Optronics Corporation 8.0" WUXGA TFT LCD panel + - auo,b080uan01 + # AU Optronics Corporation 10.1" WSVGA TFT LCD panel + - auo,b101aw03 + # AU Optronics Corporation 10.1" WSVGA TFT LCD panel + - auo,b101ean01 + # AU Optronics Corporation 10.1" WXGA TFT LCD panel + - auo,b101xtn01 # AUO B116XAK01 eDP TFT LCD panel - auo,b116xa01 + # AU Optronics Corporation 11.6" HD (1366x768) color TFT-LCD panel + - auo,b116xw03 + # AU Optronics Corporation 13.3" FHD (1920x1080) color TFT-LCD panel + - auo,b133htn01 + # AU Optronics Corporation 13.3" WXGA (1366x768) TFT LCD panel + - auo,b133xtn01 + # AU Optronics Corporation 7.0" FHD (800 x 480) TFT LCD panel + - auo,g070vvn01 + # AU Optronics Corporation 10.1" (1280x800) color TFT LCD panel + - auo,g101evn010 + # AU Optronics Corporation 10.4" (800x600) color TFT LCD panel + - auo,g104sn02 + # AU Optronics Corporation 13.3" FHD (1920x1080) TFT LCD panel + - auo,g133han01 + # AU Optronics Corporation 18.5" FHD (1920x1080) TFT LCD panel + - auo,g185han01 + # AU Optronics Corporation 31.5" FHD (1920x1080) TFT LCD panel + - auo,p320hvn03 + # AU Optronics Corporation 21.5" FHD (1920x1080) color TFT LCD panel + - auo,t215hvn01 + # Shanghai AVIC Optoelectronics 7" 1024x600 color TFT-LCD panel + - avic,tm070ddh03 + # BOE HV070WSA-100 7.01" WSVGA TFT LCD panel + - boe,hv070wsa-100 + # BOE OPTOELECTRONICS TECHNOLOGY 10.1" WXGA TFT LCD panel + - boe,nv101wxmn51 # BOE NV140FHM-N49 14.0" FHD a-Si FT panel - boe,nv140fhmn49 + # Boe Corporation 8.0" WUXGA TFT LCD panel + - boe,tv080wum-nl0 + # CDTech(H.K.) Electronics Limited 4.3" 480x272 color TFT-LCD panel + - cdtech,s043wq26h-ct7 + # CDTech(H.K.) Electronics Limited 7" 800x480 color TFT-LCD panel + - cdtech,s070wv95-ct16 + # Chunghwa Picture Tubes Ltd. 7" WXGA TFT LCD panel + - chunghwa,claa070wp03xg + # Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel + - chunghwa,claa101wa01a + # Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel + - chunghwa,claa101wb03 + # DataImage, Inc. 7" WVGA (800x480) TFT LCD panel with 24-bit parallel interface. + - dataimage,scf0700c48ggu18 + # DLC Display Co. DLC1010GIG 10.1" WXGA TFT LCD Panel + - dlc,dlc1010gig + # Emerging Display Technology Corp. 3.5" QVGA TFT LCD panel + - edt,et035012dm6 + # Emerging Display Technology Corp. 480x272 TFT Display + - edt,etm0430g0dh6 + # Emerging Display Technology Corp. 5.7" VGA TFT LCD panel + - edt,et057090dhu + # Emerging Display Technology Corp. WVGA TFT Display with capacitive touch + - edt,etm070080dh6 + # Emerging Display Technology Corp. WVGA TFT Display with capacitive touch + - edt,etm0700g0dh6 + # Emerging Display Technology Corp. WVGA TFT Display with capacitive touch + # Same as ETM0700G0DH6 but with inverted pixel clock. + - edt,etm070080bdh6 + # Emerging Display Technology Corp. WVGA TFT Display with capacitive touch + # Same display as the ETM0700G0BDH6, but with changed hardware for the + # backlight and the touch interface. + - edt,etm070080edh6 + # Emerging Display Technology Corp. WVGA TFT Display with capacitive touch + # Same timings as the ETM0700G0DH6, but with resistive touch. + - edt,etm070080dh6 + # Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel + - evervision,vgg804821 + # Foxlink Group 5" WVGA TFT LCD panel + - foxlink,fl500wvr00-a0t # Frida FRD350H54004 3.5" QVGA TFT LCD panel - frida,frd350h54004 + # FriendlyELEC HD702E 800x1280 LCD panel + - friendlyarm,hd702e + # GiantPlus GPG48273QS5 4.3" (480x272) WQVGA TFT LCD panel + - giantplus,gpg48273qs5 # GiantPlus GPM940B0 3.0" QVGA TFT LCD panel - giantplus,gpm940b0 + # HannStar Display Corp. HSD070PWW1 7.0" WXGA TFT LCD panel + - hannstar,hsd070pww1 + # HannStar Display Corp. HSD100PXN1 10.1" XGA LVDS panel + - hannstar,hsd100pxn1 + # Hitachi Ltd. Corporation 9" WVGA (800x480) TFT LCD panel + - hit,tx23d38vm0caa + # Innolux AT043TN24 4.3" WQVGA TFT LCD panel + - innolux,at043tn24 + # Innolux AT070TN92 7.0" WQVGA TFT LCD panel + - innolux,at070tn92 + # Innolux G070Y2-L01 7" WVGA (800x480) TFT LCD panel + - innolux,g070y2-l01 + # Innolux Corporation 10.1" G101ICE-L01 WXGA (1280x800) LVDS panel + - innolux,g101ice-l01 + # Innolux Corporation 12.1" WXGA (1280x800) TFT LCD panel + - innolux,g121i1-l01 + # Innolux Corporation 12.1" G121X1-L03 XGA (1024x768) TFT LCD panel + - innolux,g121x1-l03 + # Innolux Corporation 11.6" WXGA (1366x768) TFT LCD panel + - innolux,n116bge + # InnoLux 15.6" WXGA TFT LCD panel + - innolux,n156bge-l21 + # Innolux Corporation 7.0" WSVGA (1024x600) TFT LCD panel + - innolux,zj070na-01p + # Kaohsiung Opto-Electronics Inc. 5.7" QVGA (320 x 240) TFT LCD panel + - koe,tx14d24vm1bpa + # Kaohsiung Opto-Electronics. TX31D200VM0BAA 12.3" HSXGA LVDS panel + - koe,tx31d200vm0baa + # Kyocera Corporation 12.1" XGA (1024x768) TFT LCD panel + - kyo,tcg121xglp + # LeMaker BL035-RGB-002 3.5" QVGA TFT LCD panel + - lemaker,bl035-rgb-002 + # LG 7" (800x480 pixels) TFT LCD panel + - lg,lb070wv8 + # LG LP079QX1-SP0V 7.9" (1536x2048 pixels) TFT LCD panel + - lg,lp079qx1-sp0v + # LG 9.7" (2048x1536 pixels) TFT LCD panel + - lg,lp097qx1-spa1 + # LG 12.0" (1920x1280 pixels) TFT LCD panel + - lg,lp120up1 + # LG 12.9" (2560x1700 pixels) TFT LCD panel + - lg,lp129qe + # Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel + - mitsubishi,aa070mc01-ca1 + # NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel + - nec,nl12880bc20-05 + # NEC LCD Technologies,Ltd. WQVGA TFT LCD panel + - nec,nl4827hc19-05b + # Netron-DY E231732 7.0" WSVGA TFT LCD panel + - netron-dy,e231732 + # Newhaven Display International 480 x 272 TFT LCD panel + - newhaven,nhd-4.3-480272ef-atxl + # NLT Technologies, Ltd. 15.6" FHD (1920x1080) LVDS TFT LCD panel + - nlt,nl192108ac18-02d + # New Vision Display 7.0" 800 RGB x 480 TFT LCD panel + - nvd,9128 + # OKAYA Electric America, Inc. RS800480T-7X0GP 7" WVGA LCD panel + - okaya,rs800480t-7x0gp + # Olimex 4.3" TFT LCD panel + - olimex,lcd-olinuxino-43-ts + # On Tat Industrial Company 7" DPI TFT panel. + - ontat,yx700wv03 + # OrtusTech COM37H3M05DTC Blanview 3.7" VGA portrait TFT-LCD panel + - ortustech,com37h3m05dtc + # OrtusTech COM37H3M99DTC Blanview 3.7" VGA portrait TFT-LCD panel + - ortustech,com37h3m99dtc + # OrtusTech COM43H4M85ULC Blanview 3.7" TFT-LCD panel + - ortustech,com43h4m85ulc + # OSD Displays OSD070T1718-19TS 7" WVGA TFT LCD panel + - osddisplays,osd070t1718-19ts + # One Stop Displays OSD101T2045-53TS 10.1" 1920x1200 panel + - osddisplays,osd101t2045-53ts + # QiaoDian XianShi Corporation 4"3 TFT LCD panel + - qiaodian,qd43003c0-40 + # Rocktech Display Ltd. RK070ER9427 800(RGB)x480 TFT LCD panel + - rocktech,rk070er9427 + # Samsung 12.2" (2560x1600 pixels) TFT LCD panel + - samsung,lsn122dl01-c01 + # Samsung Electronics 10.1" WSVGA TFT LCD panel + - samsung,ltn101nt05 + # Samsung Electronics 14" WXGA (1366x768) TFT LCD panel + - samsung,ltn140at29-301 # Satoz SAT050AT40H12R2 5.0" WVGA TFT LCD panel - satoz,sat050at40h12r2 + # Sharp LQ035Q7DB03 3.5" QVGA TFT LCD panel + - sharp,lq035q7db03 + # Sharp LQ070Y3DG3B 7.0" WVGA landscape TFT LCD panel + - sharp,lq070y3dg3b + # Sharp Display Corp. LQ101K1LY04 10.07" WXGA TFT LCD panel + - sharp,lq101k1ly04 + # Sharp 12.3" (2400x1600 pixels) TFT LCD panel + - sharp,lq123p1jx31 # Sharp LS020B1DD01D 2.0" HQVGA TFT LCD panel - sharp,ls020b1dd01d + # Shelly SCA07010-BFN-LNN 7.0" WVGA TFT LCD panel + - shelly,sca07010-bfn-lnn + # Starry 12.2" (1920x1200 pixels) TFT LCD panel + - starry,kr122ea0sra + # Tianma Micro-electronics TM070JDHG30 7.0" WXGA TFT LCD panel + - tianma,tm070jdhg30 + # Tianma Micro-electronics TM070RVHG71 7.0" WXGA TFT LCD panel + - tianma,tm070rvhg71 + # Toshiba 8.9" WXGA (1280x768) TFT LCD panel + - toshiba,lt089ac29000 + # TPK U.S.A. LLC Fusion 7" 800 x 480 (WVGA) LCD panel with capacitive touch + - tpk,f07a-0102 + # TPK U.S.A. LLC Fusion 10.1" 1024 x 600 (WSVGA) LCD panel with capacitive touch + - tpk,f10a-0102 + # United Radiant Technology UMSH-8596MD-xT 7.0" WVGA TFT LCD panel + # Supported are LVDS versions (-11T, -19T) and parallel ones + # (-T, -1T, -7T, -20T). + - urt,umsh-8596md-t + - urt,umsh-8596md-1t + - urt,umsh-8596md-7t + - urt,umsh-8596md-11t + - urt,umsh-8596md-19t + - urt,umsh-8596md-20t + # VXT 800x480 color TFT LCD panel + - vxt,vl050-8048nt-c01 + # Winstar Display Corporation 3.5" QVGA (320x240) TFT LCD panel + - winstar,wf35ltiacd backlight: true enable-gpios: true diff --git a/Documentation/devicetree/bindings/display/panel/qiaodian,qd43003c0-40.txt b/Documentation/devicetree/bindings/display/panel/qiaodian,qd43003c0-40.txt deleted file mode 100644 index 0fbdab89ac3d..000000000000 --- a/Documentation/devicetree/bindings/display/panel/qiaodian,qd43003c0-40.txt +++ /dev/null @@ -1,7 +0,0 @@ -QiaoDian XianShi Corporation 4"3 TFT LCD panel - -Required properties: -- compatible: should be "qiaodian,qd43003c0-40" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/rocktech,rk070er9427.txt b/Documentation/devicetree/bindings/display/panel/rocktech,rk070er9427.txt deleted file mode 100644 index eb1fb9f8d1f4..000000000000 --- a/Documentation/devicetree/bindings/display/panel/rocktech,rk070er9427.txt +++ /dev/null @@ -1,25 +0,0 @@ -Rocktech Display Ltd. RK070ER9427 800(RGB)x480 TFT LCD panel - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. - -Required properties: -- compatible: should be "rocktech,rk070er9427" - -Optional properties: -- backlight: phandle of the backlight device attached to the panel - -Optional nodes: -- Video port for LCD panel input. - -Example: - panel { - compatible = "rocktech,rk070er9427"; - backlight = <&backlight_lcd>; - - port { - lcd_panel_in: endpoint { - remote-endpoint = <&lcd_display_out>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/display/panel/samsung,lsn122dl01-c01.txt b/Documentation/devicetree/bindings/display/panel/samsung,lsn122dl01-c01.txt deleted file mode 100644 index dba298b43b24..000000000000 --- a/Documentation/devicetree/bindings/display/panel/samsung,lsn122dl01-c01.txt +++ /dev/null @@ -1,7 +0,0 @@ -Samsung 12.2" (2560x1600 pixels) TFT LCD panel - -Required properties: -- compatible: should be "samsung,lsn122dl01-c01" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/samsung,ltn101nt05.txt b/Documentation/devicetree/bindings/display/panel/samsung,ltn101nt05.txt deleted file mode 100644 index ef522c6bb85f..000000000000 --- a/Documentation/devicetree/bindings/display/panel/samsung,ltn101nt05.txt +++ /dev/null @@ -1,7 +0,0 @@ -Samsung Electronics 10.1" WSVGA TFT LCD panel - -Required properties: -- compatible: should be "samsung,ltn101nt05" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/samsung,ltn140at29-301.txt b/Documentation/devicetree/bindings/display/panel/samsung,ltn140at29-301.txt deleted file mode 100644 index e7f969d891cc..000000000000 --- a/Documentation/devicetree/bindings/display/panel/samsung,ltn140at29-301.txt +++ /dev/null @@ -1,7 +0,0 @@ -Samsung Electronics 14" WXGA (1366x768) TFT LCD panel - -Required properties: -- compatible: should be "samsung,ltn140at29-301" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/sharp,lq035q7db03.txt b/Documentation/devicetree/bindings/display/panel/sharp,lq035q7db03.txt deleted file mode 100644 index 0753f6967279..000000000000 --- a/Documentation/devicetree/bindings/display/panel/sharp,lq035q7db03.txt +++ /dev/null @@ -1,12 +0,0 @@ -Sharp LQ035Q7DB03 3.5" QVGA TFT LCD panel - -Required properties: -- compatible: should be "sharp,lq035q7db03" -- power-supply: phandle of the regulator that provides the supply voltage - -Optional properties: -- enable-gpios: GPIO pin to enable or disable the panel -- backlight: phandle of the backlight device attached to the panel - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt b/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt deleted file mode 100644 index 95534b55ee5f..000000000000 --- a/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt +++ /dev/null @@ -1,12 +0,0 @@ -Sharp LQ070Y3DG3B 7.0" WVGA landscape TFT LCD panel - -Required properties: -- compatible: should be "sharp,lq070y3dg3b" - -Optional properties: -- enable-gpios: GPIO pin to enable or disable the panel -- backlight: phandle of the backlight device attached to the panel -- power-supply: phandle of the regulator that provides the supply voltage - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/sharp,lq101k1ly04.txt b/Documentation/devicetree/bindings/display/panel/sharp,lq101k1ly04.txt deleted file mode 100644 index 4aff25b8dfe6..000000000000 --- a/Documentation/devicetree/bindings/display/panel/sharp,lq101k1ly04.txt +++ /dev/null @@ -1,7 +0,0 @@ -Sharp Display Corp. LQ101K1LY04 10.07" WXGA TFT LCD panel - -Required properties: -- compatible: should be "sharp,lq101k1ly04" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/sharp,lq123p1jx31.txt b/Documentation/devicetree/bindings/display/panel/sharp,lq123p1jx31.txt deleted file mode 100644 index bcb0e8a29f71..000000000000 --- a/Documentation/devicetree/bindings/display/panel/sharp,lq123p1jx31.txt +++ /dev/null @@ -1,7 +0,0 @@ -Sharp 12.3" (2400x1600 pixels) TFT LCD panel - -Required properties: -- compatible: should be "sharp,lq123p1jx31" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/shelly,sca07010-bfn-lnn.txt b/Documentation/devicetree/bindings/display/panel/shelly,sca07010-bfn-lnn.txt deleted file mode 100644 index fc1ea9e26c94..000000000000 --- a/Documentation/devicetree/bindings/display/panel/shelly,sca07010-bfn-lnn.txt +++ /dev/null @@ -1,7 +0,0 @@ -Shelly SCA07010-BFN-LNN 7.0" WVGA TFT LCD panel - -Required properties: -- compatible: should be "shelly,sca07010-bfn-lnn" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/starry,kr122ea0sra.txt b/Documentation/devicetree/bindings/display/panel/starry,kr122ea0sra.txt deleted file mode 100644 index 1e87fe6078af..000000000000 --- a/Documentation/devicetree/bindings/display/panel/starry,kr122ea0sra.txt +++ /dev/null @@ -1,7 +0,0 @@ -Starry 12.2" (1920x1200 pixels) TFT LCD panel - -Required properties: -- compatible: should be "starry,kr122ea0sra" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/tianma,tm070jdhg30.txt b/Documentation/devicetree/bindings/display/panel/tianma,tm070jdhg30.txt deleted file mode 100644 index eb9501a82e25..000000000000 --- a/Documentation/devicetree/bindings/display/panel/tianma,tm070jdhg30.txt +++ /dev/null @@ -1,7 +0,0 @@ -Tianma Micro-electronics TM070JDHG30 7.0" WXGA TFT LCD panel - -Required properties: -- compatible: should be "tianma,tm070jdhg30" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/tianma,tm070rvhg71.txt b/Documentation/devicetree/bindings/display/panel/tianma,tm070rvhg71.txt deleted file mode 100644 index b25261e63a6d..000000000000 --- a/Documentation/devicetree/bindings/display/panel/tianma,tm070rvhg71.txt +++ /dev/null @@ -1,29 +0,0 @@ -Tianma Micro-electronics TM070RVHG71 7.0" WXGA TFT LCD panel - -Required properties: -- compatible: should be "tianma,tm070rvhg71" -- power-supply: single regulator to provide the supply voltage -- backlight: phandle of the backlight device attached to the panel - -Required nodes: -- port: LVDS port mapping to connect this display - -This panel needs single power supply voltage. Its backlight is conntrolled -via PWM signal. - -Example: --------- - -Example device-tree definition when connected to iMX6Q based board - - panel: panel-lvds0 { - compatible = "tianma,tm070rvhg71"; - backlight = <&backlight_lvds>; - power-supply = <®_lvds>; - - port { - panel_in_lvds0: endpoint { - remote-endpoint = <&lvds0_out>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/display/panel/toshiba,lt089ac29000.txt b/Documentation/devicetree/bindings/display/panel/toshiba,lt089ac29000.txt deleted file mode 100644 index 89826116628c..000000000000 --- a/Documentation/devicetree/bindings/display/panel/toshiba,lt089ac29000.txt +++ /dev/null @@ -1,8 +0,0 @@ -Toshiba 8.9" WXGA (1280x768) TFT LCD panel - -Required properties: -- compatible: should be "toshiba,lt089ac29000" -- power-supply: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/tpk,f07a-0102.txt b/Documentation/devicetree/bindings/display/panel/tpk,f07a-0102.txt deleted file mode 100644 index a2613b9675df..000000000000 --- a/Documentation/devicetree/bindings/display/panel/tpk,f07a-0102.txt +++ /dev/null @@ -1,8 +0,0 @@ -TPK U.S.A. LLC Fusion 7" integrated projected capacitive touch display with, -800 x 480 (WVGA) LCD panel. - -Required properties: -- compatible: should be "tpk,f07a-0102" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/tpk,f10a-0102.txt b/Documentation/devicetree/bindings/display/panel/tpk,f10a-0102.txt deleted file mode 100644 index b9d051196ba9..000000000000 --- a/Documentation/devicetree/bindings/display/panel/tpk,f10a-0102.txt +++ /dev/null @@ -1,8 +0,0 @@ -TPK U.S.A. LLC Fusion 10.1" integrated projected capacitive touch display with, -1024 x 600 (WSVGA) LCD panel. - -Required properties: -- compatible: should be "tpk,f10a-0102" - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/urt,umsh-8596md.txt b/Documentation/devicetree/bindings/display/panel/urt,umsh-8596md.txt deleted file mode 100644 index 088a6cea5015..000000000000 --- a/Documentation/devicetree/bindings/display/panel/urt,umsh-8596md.txt +++ /dev/null @@ -1,16 +0,0 @@ -United Radiant Technology UMSH-8596MD-xT 7.0" WVGA TFT LCD panel - -Supported are LVDS versions (-11T, -19T) and parallel ones -(-T, -1T, -7T, -20T). - -Required properties: -- compatible: should be one of: - "urt,umsh-8596md-t", - "urt,umsh-8596md-1t", - "urt,umsh-8596md-7t", - "urt,umsh-8596md-11t", - "urt,umsh-8596md-19t", - "urt,umsh-8596md-20t". - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt b/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt deleted file mode 100644 index b42bf06bbd99..000000000000 --- a/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt +++ /dev/null @@ -1,12 +0,0 @@ -VXT 800x480 color TFT LCD panel - -Required properties: -- compatible: should be "vxt,vl050-8048nt-c01" -- power-supply: as specified in the base binding - -Optional properties: -- backlight: as specified in the base binding -- enable-gpios: as specified in the base binding - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/winstar,wf35ltiacd.txt b/Documentation/devicetree/bindings/display/panel/winstar,wf35ltiacd.txt deleted file mode 100644 index 2a7e6e3ba64c..000000000000 --- a/Documentation/devicetree/bindings/display/panel/winstar,wf35ltiacd.txt +++ /dev/null @@ -1,48 +0,0 @@ -Winstar Display Corporation 3.5" QVGA (320x240) TFT LCD panel - -Required properties: -- compatible: should be "winstar,wf35ltiacd" -- power-supply: regulator to provide the VCC supply voltage (3.3 volts) - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. - -Example: - backlight: backlight { - compatible = "pwm-backlight"; - pwms = <&hlcdc_pwm 0 50000 PWM_POLARITY_INVERTED>; - brightness-levels = <0 31 63 95 127 159 191 223 255>; - default-brightness-level = <191>; - power-supply = <&bl_reg>; - }; - - bl_reg: backlight_regulator { - compatible = "regulator-fixed"; - regulator-name = "backlight-power-supply"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - }; - - panel: panel { - compatible = "winstar,wf35ltiacd", "simple-panel"; - backlight = <&backlight>; - power-supply = <&panel_reg>; - #address-cells = <1>; - #size-cells = <0>; - - port { - #address-cells = <1>; - #size-cells = <0>; - - panel_input: endpoint { - remote-endpoint = <&hlcdc_panel_output>; - }; - }; - }; - - panel_reg: panel_regulator { - compatible = "regulator-fixed"; - regulator-name = "panel-power-supply"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; - }; -- cgit v1.2.3 From def6e7f13e0b4f28f1d9da0ab65f2bff44c715b0 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Mon, 20 Jan 2020 09:00:58 +0100 Subject: dt-bindings: add vendor prefix for logic technologies limited Add vendor prefix for Logic Technologies Limited [1] which is a Chinese display manufacturer e.g. distributed by German Endrich Bauelemente Vertriebs GmbH [2]. [1] https://logictechno.com/contact-us/ [2] https://www.endrich.com/isi50_isi30_tft-displays/lt170410-1whc_isi30 Signed-off-by: Marcel Ziswiler Reviewed-by: Philippe Schenker Acked-by: Rob Herring Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200120080100.170294-1-marcel@ziswiler.com --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index f9b84f24a382..ac4804d0a991 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -549,6 +549,8 @@ patternProperties: description: Linear Technology Corporation "^logicpd,.*": description: Logic PD, Inc. + "^logictechno,.*": + description: Logic Technologies Limited "^longcheer,.*": description: Longcheer Technology (Shanghai) Co., Ltd. "^lsi,.*": -- cgit v1.2.3 From 300fc577d64ec06fe2707b284bf8dac22d371066 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Mon, 20 Jan 2020 09:00:59 +0100 Subject: dt-bindings: panel-simple: add bindings for logic technologies displays Add bindings for the following 3 to be added display panels manufactured by Logic Technologies Limited: - LT161010-2NHC e.g. as found in the Toradex Capacitive Touch Display 7" Parallel [1] - LT161010-2NHR e.g. as found in the Toradex Resistive Touch Display 7" Parallel [2] - LT170410-2WHC e.g. as found in the Toradex Capacitive Touch Display 10.1" LVDS [3] Those panels may also be distributed by Endrich Bauelemente Vertriebs GmbH [4]. [1] https://docs.toradex.com/104497-7-inch-parallel-capacitive-touch-display-800x480-datasheet.pdf [2] https://docs.toradex.com/104498-7-inch-parallel-resistive-touch-display-800x480.pdf [3] https://docs.toradex.com/105952-10-1-inch-lvds-capacitive-touch-display-1280x800-datasheet.pdf [4] https://www.endrich.com/isi50_isi30_tft-displays/lt170410-1whc_isi30 Signed-off-by: Marcel Ziswiler Reviewed-by: Oleksandr Suvorov Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200120080100.170294-2-marcel@ziswiler.com --- Documentation/devicetree/bindings/display/panel/panel-simple.yaml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index 4802ec18b927..bdf4d0bf4f3f 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -161,6 +161,12 @@ properties: - lg,lp120up1 # LG 12.9" (2560x1700 pixels) TFT LCD panel - lg,lp129qe + # Logic Technologies LT161010-2NHC 7" WVGA TFT Cap Touch Module + - logictechno,lt161010-2nhc + # Logic Technologies LT161010-2NHR 7" WVGA TFT Resistive Touch Module + - logictechno,lt161010-2nhr + # Logic Technologies LT170410-2WHC 10.1" 1280x800 IPS TFT Cap Touch Mod. + - logictechno,lt170410-2whc # Mitsubishi "AA070MC01 7.0" WVGA TFT LCD panel - mitsubishi,aa070mc01-ca1 # NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel -- cgit v1.2.3 From c2d4290ba0fff0aad5bc491af18adfbc88bf1c8c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Mon, 20 Jan 2020 20:02:49 +0100 Subject: dt-bindings: restrict properties for sitronix,st7735r David Lechner noticed (paraphrased): - not all properties from panel-common are applicable. - missing optional rotation and backlight properties Fix this by listing all allowed properties, and do not allow other properties. Fixes: abdd9e3705c8 ("dt-bindings: display: sitronix,st7735r: Convert to DT schema") Reported-by: David Lechner Signed-off-by: Sam Ravnborg Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Cc: David Lechner Cc: dri-devel@lists.freedesktop.org Link: https://patchwork.freedesktop.org/patch/msgid/20200120190249.GA9619@ravnborg.org --- Documentation/devicetree/bindings/display/sitronix,st7735r.yaml | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml index 8892d79e6e10..0cebaaefda03 100644 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -39,12 +39,19 @@ properties: maxItems: 1 description: Display data/command selection (D/CX) + backlight: true + reg: true + reset-gpios: true + rotation: true + required: - compatible - reg - dc-gpios - reset-gpios +additionalProperties: false + examples: - | #include -- cgit v1.2.3 From 9ad676e5fd45a18227a52091038a4f47b350cf4d Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 19 Nov 2019 11:53:18 +0200 Subject: dt-bindings: display: ti,k2g-dss: Add dt-schema yaml binding Add dt-schema yaml bindig for K2G DSS, an ultra-light version of TI Keystone Display SubSystem. Version history: v2: no change v3: - Add ports node - Add includes to dts example - reindent dts example v4: - Add descriptions to reg and clocks properties - Remove minItems when its value is the same as maxItems value - Remove ports node v5: - itemize reg and clocks properties' descriptions v6: - Add Reviewed-by: from Rob Herring and Benoit Parrot v7: no change v8: no change v9: - Remove ports-node from the dts example Signed-off-by: Jyri Sarha Reviewed-by: Rob Herring Reviewed-by: Benoit Parrot Link: https://patchwork.freedesktop.org/patch/msgid/270297321f0768c10e241d289e3ac10e39cf12a9.1580129724.git.jsarha@ti.com --- .../devicetree/bindings/display/ti/ti,k2g-dss.yaml | 106 +++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/ti/ti,k2g-dss.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/ti/ti,k2g-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,k2g-dss.yaml new file mode 100644 index 000000000000..385bd060ccf9 --- /dev/null +++ b/Documentation/devicetree/bindings/display/ti/ti,k2g-dss.yaml @@ -0,0 +1,106 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2019 Texas Instruments Incorporated +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/display/ti/ti,k2g-dss.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Texas Instruments K2G Display Subsystem + +maintainers: + - Jyri Sarha + - Tomi Valkeinen + +description: | + The K2G DSS is an ultra-light version of TI Keystone Display + SubSystem. It has only one output port and video plane. The + output is DPI. + +properties: + compatible: + const: ti,k2g-dss + + reg: + items: + - description: cfg DSS top level + - description: common DISPC common + - description: VID1 video plane 1 + - description: OVR1 overlay manager for vp1 + - description: VP1 video port 1 + + reg-names: + items: + - const: cfg + - const: common + - const: vid1 + - const: ovr1 + - const: vp1 + + clocks: + items: + - description: fck DSS functional clock + - description: vp1 Video Port 1 pixel clock + + clock-names: + items: + - const: fck + - const: vp1 + + interrupts: + maxItems: 1 + + power-domains: + maxItems: 1 + description: phandle to the associated power domain + + port: + type: object + description: + Port as described in Documentation/devictree/bindings/graph.txt. + The DSS DPI output port node + + max-memory-bandwidth: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Input memory (from main memory to dispc) bandwidth limit in + bytes per second + +required: + - compatible + - reg + - reg-names + - clocks + - clock-names + - interrupts + - port + +additionalProperties: false + +examples: + - | + #include + #include + + dss: dss@02540000 { + compatible = "ti,k2g-dss"; + reg = <0x02540000 0x400>, + <0x02550000 0x1000>, + <0x02557000 0x1000>, + <0x0255a800 0x100>, + <0x0255ac00 0x100>; + reg-names = "cfg", "common", "vid1", "ovr1", "vp1"; + clocks = <&k2g_clks 0x2 0>, + <&k2g_clks 0x2 1>; + clock-names = "fck", "vp1"; + interrupts = ; + + power-domains = <&k2g_pds 0x2>; + + max-memory-bandwidth = <230000000>; + + port { + dpi_out: endpoint { + remote-endpoint = <&sii9022_in>; + }; + }; + }; -- cgit v1.2.3 From 2d8730f1021f1378af8b1d03e8619a9de939e2d3 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 19 Nov 2019 19:28:30 +0200 Subject: dt-bindings: display: ti,am65x-dss: Add dt-schema yaml binding Add dt-schema yaml bindig for AM65x DSS, AM65x version TI Keystone Display SubSystem. Version history: v2: no change v3: - Add ports node - use allOf in ti,am65x-oldi-io-ctrl to add both $ref and maxItems - Add includes to dts example - reindent dts example v4: - Add descriptions to reg and clocks properties - Remove minItems when its value is the same as maxItems value v5: - itemize reg and clocks properties' descriptions v6: - Add Reviewed-by: from Rob Herring and Benoit Parrot v7: no change v8: no change Signed-off-by: Jyri Sarha Reviewed-by: Rob Herring Reviewed-by: Benoit Parrot Link: https://patchwork.freedesktop.org/patch/msgid/1799656fb8cc42ca698fc29bbc80c1bae442d6d3.1580129724.git.jsarha@ti.com --- .../bindings/display/ti/ti,am65x-dss.yaml | 152 +++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml new file mode 100644 index 000000000000..cac61a998203 --- /dev/null +++ b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml @@ -0,0 +1,152 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2019 Texas Instruments Incorporated +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/display/ti/ti,am65x-dss.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Texas Instruments AM65x Display Subsystem + +maintainers: + - Jyri Sarha + - Tomi Valkeinen + +description: | + The AM65x TI Keystone Display SubSystem with two output ports and + two video planes. The first video port supports OLDI and the second + supports DPI format. The fist plane is full video plane with all + features and the second is a "lite plane" without scaling support. + +properties: + compatible: + const: ti,am65x-dss + + reg: + description: + Addresses to each DSS memory region described in the SoC's TRM. + items: + - description: common DSS register area + - description: VIDL1 light video plane + - description: VID video plane + - description: OVR1 overlay manager for vp1 + - description: OVR2 overlay manager for vp2 + - description: VP1 video port 1 + - description: VP2 video port 2 + + reg-names: + items: + - const: common + - const: vidl1 + - const: vid + - const: ovr1 + - const: ovr2 + - const: vp1 + - const: vp2 + + clocks: + items: + - description: fck DSS functional clock + - description: vp1 Video Port 1 pixel clock + - description: vp2 Video Port 2 pixel clock + + clock-names: + items: + - const: fck + - const: vp1 + - const: vp2 + + interrupts: + maxItems: 1 + + power-domains: + maxItems: 1 + description: phandle to the associated power domain + + ports: + type: object + description: + Ports as described in Documentation/devictree/bindings/graph.txt + properties: + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + port@0: + type: object + description: + The DSS OLDI output port node form video port 1 + + port@1: + type: object + description: + The DSS DPI output port node from video port 2 + + required: + - "#address-cells" + - "#size-cells" + + ti,am65x-oldi-io-ctrl: + allOf: + - $ref: "/schemas/types.yaml#/definitions/phandle-array" + - maxItems: 1 + description: + phandle to syscon device node mapping OLDI IO_CTRL registers. + The mapped range should point to OLDI_DAT0_IO_CTRL, map it and + following OLDI_DAT1_IO_CTRL, OLDI_DAT2_IO_CTRL, OLDI_DAT3_IO_CTRL, + and OLDI_CLK_IO_CTRL registers. This property is needed for OLDI + interface to work. + + max-memory-bandwidth: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Input memory (from main memory to dispc) bandwidth limit in + bytes per second + +required: + - compatible + - reg + - reg-names + - clocks + - clock-names + - interrupts + - ports + +additionalProperties: false + +examples: + - | + #include + #include + #include + + dss: dss@04a00000 { + compatible = "ti,am65x-dss"; + reg = <0x0 0x04a00000 0x0 0x1000>, /* common */ + <0x0 0x04a02000 0x0 0x1000>, /* vidl1 */ + <0x0 0x04a06000 0x0 0x1000>, /* vid */ + <0x0 0x04a07000 0x0 0x1000>, /* ovr1 */ + <0x0 0x04a08000 0x0 0x1000>, /* ovr2 */ + <0x0 0x04a0a000 0x0 0x1000>, /* vp1 */ + <0x0 0x04a0b000 0x0 0x1000>; /* vp2 */ + reg-names = "common", "vidl1", "vid", + "ovr1", "ovr2", "vp1", "vp2"; + ti,am65x-oldi-io-ctrl = <&dss_oldi_io_ctrl>; + power-domains = <&k3_pds 67 TI_SCI_PD_EXCLUSIVE>; + clocks = <&k3_clks 67 1>, + <&k3_clks 216 1>, + <&k3_clks 67 2>; + clock-names = "fck", "vp1", "vp2"; + interrupts = ; + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + oldi_out0: endpoint { + remote-endpoint = <&lcd_in0>; + }; + }; + }; + }; -- cgit v1.2.3 From 6057317cb76c9a94ba3f00562598e4f77442133b Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 19 Nov 2019 19:32:37 +0200 Subject: dt-bindings: display: ti,j721e-dss: Add dt-schema yaml binding Add dt-schema yaml bindig for J721E DSS, J721E version TI Keystone Display SubSystem. Version history: v2: no change v3: - reg-names: "wp" -> "wb" - Add ports node - Add includes to dts example - reindent dts example v4: - Add descriptions to reg, clocks, and interrupts properties - Remove minItems when its value is the same as maxItems value v5: - itemize reg, clocks and interrupts properties' descriptions - there is no "vp" reg-name, only "wb" for write back v6: - Add Reviewed-by: from Rob Herring and Benoit Parrot v7: no change v8: no change v9: no change Signed-off-by: Jyri Sarha Reviewed-by: Rob Herring Reviewed-by: Benoit Parrot Link: https://patchwork.freedesktop.org/patch/msgid/fcba52837808853a4b2bf729bd33bd986cbdadf4.1580129724.git.jsarha@ti.com --- .../bindings/display/ti/ti,j721e-dss.yaml | 208 +++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml new file mode 100644 index 000000000000..ade9b2f513f5 --- /dev/null +++ b/Documentation/devicetree/bindings/display/ti/ti,j721e-dss.yaml @@ -0,0 +1,208 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +# Copyright 2019 Texas Instruments Incorporated +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/display/ti/ti,j721e-dss.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Texas Instruments J721E Display Subsystem + +maintainers: + - Jyri Sarha + - Tomi Valkeinen + +description: | + The J721E TI Keystone Display SubSystem with four output ports and + four video planes. There is two full video planes and two "lite + planes" without scaling support. The video ports can be connected to + the SoC's DPI pins or to integrated display bridges on the SoC. + +properties: + compatible: + const: ti,j721e-dss + + reg: + items: + - description: common_m DSS Master common + - description: common_s0 DSS Shared common 0 + - description: common_s1 DSS Shared common 1 + - description: common_s2 DSS Shared common 2 + - description: VIDL1 light video plane 1 + - description: VIDL2 light video plane 2 + - description: VID1 video plane 1 + - description: VID1 video plane 2 + - description: OVR1 overlay manager for vp1 + - description: OVR2 overlay manager for vp2 + - description: OVR3 overlay manager for vp3 + - description: OVR4 overlay manager for vp4 + - description: VP1 video port 1 + - description: VP2 video port 2 + - description: VP3 video port 3 + - description: VP4 video port 4 + - description: WB Write Back + + reg-names: + items: + - const: common_m + - const: common_s0 + - const: common_s1 + - const: common_s2 + - const: vidl1 + - const: vidl2 + - const: vid1 + - const: vid2 + - const: ovr1 + - const: ovr2 + - const: ovr3 + - const: ovr4 + - const: vp1 + - const: vp2 + - const: vp3 + - const: vp4 + - const: wb + + clocks: + items: + - description: fck DSS functional clock + - description: vp1 Video Port 1 pixel clock + - description: vp2 Video Port 2 pixel clock + - description: vp3 Video Port 3 pixel clock + - description: vp4 Video Port 4 pixel clock + + clock-names: + items: + - const: fck + - const: vp1 + - const: vp2 + - const: vp3 + - const: vp4 + + interrupts: + items: + - description: common_m DSS Master common + - description: common_s0 DSS Shared common 0 + - description: common_s1 DSS Shared common 1 + - description: common_s2 DSS Shared common 2 + + interrupt-names: + items: + - const: common_m + - const: common_s0 + - const: common_s1 + - const: common_s2 + + power-domains: + maxItems: 1 + description: phandle to the associated power domain + + ports: + type: object + description: + Ports as described in Documentation/devictree/bindings/graph.txt + properties: + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + port@0: + type: object + description: + The output port node form video port 1 + + port@1: + type: object + description: + The output port node from video port 2 + + port@2: + type: object + description: + The output port node from video port 3 + + port@3: + type: object + description: + The output port node from video port 4 + + required: + - "#address-cells" + - "#size-cells" + + max-memory-bandwidth: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Input memory (from main memory to dispc) bandwidth limit in + bytes per second + +required: + - compatible + - reg + - reg-names + - clocks + - clock-names + - interrupts + - interrupt-names + - ports + +additionalProperties: false + +examples: + - | + #include + #include + #include + + dss: dss@04a00000 { + compatible = "ti,j721e-dss"; + reg = <0x00 0x04a00000 0x00 0x10000>, /* common_m */ + <0x00 0x04a10000 0x00 0x10000>, /* common_s0*/ + <0x00 0x04b00000 0x00 0x10000>, /* common_s1*/ + <0x00 0x04b10000 0x00 0x10000>, /* common_s2*/ + <0x00 0x04a20000 0x00 0x10000>, /* vidl1 */ + <0x00 0x04a30000 0x00 0x10000>, /* vidl2 */ + <0x00 0x04a50000 0x00 0x10000>, /* vid1 */ + <0x00 0x04a60000 0x00 0x10000>, /* vid2 */ + <0x00 0x04a70000 0x00 0x10000>, /* ovr1 */ + <0x00 0x04a90000 0x00 0x10000>, /* ovr2 */ + <0x00 0x04ab0000 0x00 0x10000>, /* ovr3 */ + <0x00 0x04ad0000 0x00 0x10000>, /* ovr4 */ + <0x00 0x04a80000 0x00 0x10000>, /* vp1 */ + <0x00 0x04aa0000 0x00 0x10000>, /* vp2 */ + <0x00 0x04ac0000 0x00 0x10000>, /* vp3 */ + <0x00 0x04ae0000 0x00 0x10000>, /* vp4 */ + <0x00 0x04af0000 0x00 0x10000>; /* wb */ + reg-names = "common_m", "common_s0", + "common_s1", "common_s2", + "vidl1", "vidl2","vid1","vid2", + "ovr1", "ovr2", "ovr3", "ovr4", + "vp1", "vp2", "vp3", "vp4", + "wb"; + clocks = <&k3_clks 152 0>, + <&k3_clks 152 1>, + <&k3_clks 152 4>, + <&k3_clks 152 9>, + <&k3_clks 152 13>; + clock-names = "fck", "vp1", "vp2", "vp3", "vp4"; + power-domains = <&k3_pds 152 TI_SCI_PD_EXCLUSIVE>; + interrupts = , + , + , + ; + interrupt-names = "common_m", + "common_s0", + "common_s1", + "common_s2"; + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + + dpi_out_0: endpoint { + remote-endpoint = <&dp_bridge_input>; + }; + }; + }; + }; -- cgit v1.2.3 From 9a69bd1912af86ce15a7c5dd6691518a64709ef4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 13 Dec 2019 18:26:03 +0100 Subject: drm/todo: Add item for the plane->atomic_check confusion It's frankly a mess, and the confusion around plane_state->crtc/fb that I fixed up in this series is the least of the problems. Add a todo as a future note of how this could be done a lot better, and with a lot less driver confusion. Acked-by: Sam Ravnborg Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20191213172612.1514842-1-daniel.vetter@ffwll.ch --- Documentation/gpu/todo.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'Documentation') diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index bc869b23fc39..370ac678106e 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -72,6 +72,28 @@ Contact: Ville Syrjälä, Daniel Vetter, driver maintainers Level: Advanced +Improve plane atomic_check helpers +---------------------------------- + +Aside from the clipped coordinates right above there's a few suboptimal things +with the current helpers: + +- drm_plane_helper_funcs->atomic_check gets called for enabled or disabled + planes. At best this seems to confuse drivers, worst it means they blow up + when the plane is disabled without the CRTC. The only special handling is + resetting values in the plane state structures, which instead should be moved + into the drm_plane_funcs->atomic_duplicate_state functions. + +- Once that's done, helpers could stop calling ->atomic_check for disabled + planes. + +- Then we could go through all the drivers and remove the more-or-less confused + checks for plane_state->fb and plane_state->crtc. + +Contact: Daniel Vetter + +Level: Advanced + Convert early atomic drivers to async commit helpers ---------------------------------------------------- -- cgit v1.2.3 From c752affbadb05b619247d462c9f7fa91b3adb6c3 Mon Sep 17 00:00:00 2001 From: Marian-Cristian Rotariu Date: Thu, 30 Jan 2020 12:08:37 +0000 Subject: dt-bindings: display: Add bindings for EDT panel Document the Emerging Display Technology Corp. (EDT) ETM043080DH6-GP display, which is a 480x272 4.3" TFT display with capacitive touchscreen. Changes in v2: -modify proper bindings file Signed-off-by: Marian-Cristian Rotariu Reviewed-by: Lad Prabhakar Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/1580386118-22895-2-git-send-email-marian-cristian.rotariu.rb@bp.renesas.com --- Documentation/devicetree/bindings/display/panel/panel-simple.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index bdf4d0bf4f3f..cf23b0ad077a 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -89,6 +89,8 @@ properties: - dlc,dlc1010gig # Emerging Display Technology Corp. 3.5" QVGA TFT LCD panel - edt,et035012dm6 + # Emerging Display Technology Corp. 480x272 TFT Display with capacitive touch + - edt,etm043080dh6gp # Emerging Display Technology Corp. 480x272 TFT Display - edt,etm0430g0dh6 # Emerging Display Technology Corp. 5.7" VGA TFT LCD panel -- cgit v1.2.3 From 1dff44e1b51e1baaf069667b2cafeaf93d1b6320 Mon Sep 17 00:00:00 2001 From: Michael Srba Date: Thu, 30 Jan 2020 21:35:54 +0100 Subject: dt-bindings: display/panel: add bindings for S6E88A0-AMS452EF01 This patch adds dts bindings for Samsung AMS452EF01 AMOLED panel, which makes use of their S6E88A0 controller. Signed-off-by: Michael Srba Signed-off-by: Sam Ravnborg [fixed syntax and updated example to fix warnings] Link: https://patchwork.freedesktop.org/patch/msgid/20200130203555.316-1-michael.srba@seznam.cz --- .../display/panel/samsung,s6e88a0-ams452ef01.yaml | 50 ++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml new file mode 100644 index 000000000000..7a685d0428b3 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e88a0-ams452ef01.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/samsung,s6e88a0-ams452ef01.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung AMS452EF01 AMOLED panel with S6E88A0 video mode DSI controller + +maintainers: + - Michael Srba + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: samsung,s6e88a0-ams452ef01 + reg: true + reset-gpios: true + vdd3-supply: + description: core voltage supply + vci-supply: + description: voltage supply for analog circuits + +required: + - compatible + - reg + - vdd3-supply + - vci-supply + - reset-gpios + +additionalProperties: false + +examples: + - | + #include + + dsi { + #address-cells = <1>; + #size-cells = <0>; + panel@0 { + reg = <0>; + + compatible = "samsung,s6e88a0-ams452ef01"; + + vdd3-supply = <&pm8916_l17>; + vci-supply = <®_vlcd_vci>; + reset-gpios = <&msmgpio 25 GPIO_ACTIVE_HIGH>; + }; + }; -- cgit v1.2.3 From a6c5729b0ae1164326c8899a67a40cbe4325e82e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 31 Jan 2020 09:28:33 +0100 Subject: openrisc: configs: Cleanup CONFIG_CROSS_COMPILE CONFIG_CROSS_COMPILE is gone since commit f1089c92da79 ("kbuild: remove CONFIG_CROSS_COMPILE support"). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Stafford Horne --- Documentation/openrisc/openrisc_port.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/openrisc/openrisc_port.rst b/Documentation/openrisc/openrisc_port.rst index a18747a8d191..4b2c437942a0 100644 --- a/Documentation/openrisc/openrisc_port.rst +++ b/Documentation/openrisc/openrisc_port.rst @@ -37,8 +37,8 @@ or Stafford's toolchain build and release scripts. Build the Linux kernel as usual:: - make ARCH=openrisc defconfig - make ARCH=openrisc + make ARCH=openrisc CROSS_COMPILE="or1k-linux-" defconfig + make ARCH=openrisc CROSS_COMPILE="or1k-linux-" 3) Running on FPGA (optional) -- cgit v1.2.3 From 89bd6147964ea3d72708ee30ba309ac675a5b997 Mon Sep 17 00:00:00 2001 From: Venkata Lakshmi Narayana Gubba Date: Mon, 3 Feb 2020 16:10:41 +0530 Subject: dt-bindings: net: bluetooth: Add device tree bindings for QTI chip WCN3991 Add compatible string for the Qualcomm WCN3991 Bluetooth controller Signed-off-by: Venkata Lakshmi Narayana Gubba Signed-off-by: Marcel Holtmann --- Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt index 68b67d9db63a..beca6466d59a 100644 --- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt +++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt @@ -11,6 +11,7 @@ Required properties: - compatible: should contain one of the following: * "qcom,qca6174-bt" * "qcom,wcn3990-bt" + * "qcom,wcn3991-bt" * "qcom,wcn3998-bt" Optional properties for compatible string qcom,qca6174-bt: @@ -30,6 +31,7 @@ Optional properties for compatible string qcom,wcn399x-bt: - max-speed: see Documentation/devicetree/bindings/serial/slave-device.txt - firmware-name: specify the name of nvm firmware to load + - clocks: clock provided to the controller Examples: @@ -56,5 +58,6 @@ serial@898000 { vddch0-supply = <&vreg_l25a_3p3>; max-speed = <3200000>; firmware-name = "crnv21.bin"; + clocks = <&rpmhcc RPMH_RF_CLK2>; }; }; -- cgit v1.2.3 From 79b9376404b02cba80198cb1294bcade155a965d Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Thu, 6 Feb 2020 14:33:42 +0100 Subject: dt-bindings: one file of all simple DSI panels To complement panel-simple.yaml, create panel-simple-dsi.yaml. panel-simple-dsi-yaml are for all simple DSP panels with a single power-supply and optional backlight / enable GPIO. Migrate panasonic,vvx10f034n00 over to the new file. The objectives with one file for all the simple DSI panels are: - Make it simpler to add bindings for simple DSI panels - Keep the number of bindings file lower - Keep the binding documentation for simple DSI panels more consistent Signed-off-by: Sam Ravnborg Signed-off-by: Benjamin Gaignard Reviewed-by: Rob Herring Reviewed-by: Philippe Cornu Signed-off-by: Sam Ravnborg Cc: Rob Herring Cc: Maxime Ripard Cc: Yannick Fertre Cc: Mark Rutland Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: devicetree@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200206133344.724-2-benjamin.gaignard@st.com --- .../display/panel/panasonic,vvx10f034n00.txt | 20 ------- .../bindings/display/panel/panel-simple-dsi.yaml | 67 ++++++++++++++++++++++ 2 files changed, 67 insertions(+), 20 deletions(-) delete mode 100644 Documentation/devicetree/bindings/display/panel/panasonic,vvx10f034n00.txt create mode 100644 Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f034n00.txt b/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f034n00.txt deleted file mode 100644 index 37dedf6a6702..000000000000 --- a/Documentation/devicetree/bindings/display/panel/panasonic,vvx10f034n00.txt +++ /dev/null @@ -1,20 +0,0 @@ -Panasonic 10" WUXGA TFT LCD panel - -Required properties: -- compatible: should be "panasonic,vvx10f034n00" -- reg: DSI virtual channel of the peripheral -- power-supply: phandle of the regulator that provides the supply voltage - -Optional properties: -- backlight: phandle of the backlight device attached to the panel - -Example: - - mdss_dsi@fd922800 { - panel@0 { - compatible = "panasonic,vvx10f034n00"; - reg = <0>; - power-supply = <&vreg_vsp>; - backlight = <&lp8566_wled>; - }; - }; diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml new file mode 100644 index 000000000000..8b60368a2425 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/panel-simple-dsi.yaml @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: (GPL-2.0-only or BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/panel-simple-dsi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Simple DSI panels with a single power-supply + +maintainers: + - Thierry Reding + - Sam Ravnborg + +description: | + This binding file is a collection of the DSI panels that + requires only a single power-supply. + There are optionally a backlight and an enable GPIO. + The panel may use an OF graph binding for the association to the display, + or it may be a direct child node of the display. + + If the panel is more advanced a dedicated binding file is required. + +allOf: + - $ref: panel-common.yaml# + +properties: + + compatible: + enum: + # compatible must be listed in alphabetical order, ordered by compatible. + # The description in the comment is mandatory for each compatible. + + # Panasonic 10" WUXGA TFT LCD panel + - panasonic,vvx10f034n00 + + reg: + maxItems: 1 + description: DSI virtual channel + + backlight: true + enable-gpios: true + port: true + power-supply: true + +additionalProperties: false + +required: + - compatible + - power-supply + - reg + +examples: + - | + mdss_dsi@fd922800 { + #address-cells = <1>; + #size-cells = <0>; + panel@0 { + compatible = "panasonic,vvx10f034n00"; + reg = <0>; + power-supply = <&vcc_lcd_reg>; + + port { + panel: endpoint { + remote-endpoint = <<dc_out>; + }; + }; + }; + }; -- cgit v1.2.3 From 1689578c5aa643fb11839da78191af827f08c382 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 6 Feb 2020 14:33:43 +0100 Subject: dt-bindings: panel: Convert raydium,rm68200 to json-schema Convert raydium,rm68200 to json-schema. Signed-off-by: Benjamin Gaignard Reviewed-by: Rob Herring Reviewed-by: Philippe Cornu Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200206133344.724-3-benjamin.gaignard@st.com --- .../bindings/display/panel/raydium,rm68200.txt | 25 ---------- .../bindings/display/panel/raydium,rm68200.yaml | 56 ++++++++++++++++++++++ 2 files changed, 56 insertions(+), 25 deletions(-) delete mode 100644 Documentation/devicetree/bindings/display/panel/raydium,rm68200.txt create mode 100644 Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/raydium,rm68200.txt b/Documentation/devicetree/bindings/display/panel/raydium,rm68200.txt deleted file mode 100644 index cbb79ef3bfc9..000000000000 --- a/Documentation/devicetree/bindings/display/panel/raydium,rm68200.txt +++ /dev/null @@ -1,25 +0,0 @@ -Raydium Semiconductor Corporation RM68200 5.5" 720p MIPI-DSI TFT LCD panel - -The Raydium Semiconductor Corporation RM68200 is a 5.5" 720x1280 TFT LCD -panel connected using a MIPI-DSI video interface. - -Required properties: - - compatible: "raydium,rm68200" - - reg: the virtual channel number of a DSI peripheral - -Optional properties: - - reset-gpios: a GPIO spec for the reset pin (active low). - - power-supply: phandle of the regulator that provides the supply voltage. - - backlight: phandle of the backlight device attached to the panel. - -Example: -&dsi { - ... - panel@0 { - compatible = "raydium,rm68200"; - reg = <0>; - reset-gpios = <&gpiof 15 GPIO_ACTIVE_LOW>; - power-supply = <&v1v8>; - backlight = <&pwm_backlight>; - }; -}; diff --git a/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml b/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml new file mode 100644 index 000000000000..09149f140d5f --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only or BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/raydium,rm68200.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Raydium Semiconductor Corporation RM68200 5.5" 720p MIPI-DSI TFT LCD panel + +maintainers: + - Philippe CORNU + +description: | + The Raydium Semiconductor Corporation RM68200 is a 5.5" 720x1280 TFT LCD + panel connected using a MIPI-DSI video interface. + +allOf: + - $ref: panel-common.yaml# + +properties: + + compatible: + const: raydium,rm68200 + + reg: + maxItems: 1 + description: DSI virtual channel + + backlight: true + enable-gpios: true + port: true + power-supply: true + + reset-gpios: + maxItems: 1 + +additionalProperties: false + +required: + - compatible + - power-supply + - reg + +examples: + - | + dsi@0 { + #address-cells = <1>; + #size-cells = <0>; + panel@0 { + compatible = "raydium,rm68200"; + reg = <0>; + reset-gpios = <&gpiof 15 0>; + power-supply = <&v1v8>; + backlight = <&pwm_backlight>; + }; + }; +... -- cgit v1.2.3 From c55d0a554843f2c4dfb44d0c8a99a1670c32c33d Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Thu, 6 Feb 2020 14:33:44 +0100 Subject: dt-bindings: panel: Convert orisetech,otm8009a to json-schema Convert orisetech,otm8009a to json-schema. Signed-off-by: Benjamin Gaignard Reviewed-by: Rob Herring Reviewed-by: Philippe Cornu Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200206133344.724-4-benjamin.gaignard@st.com --- .../bindings/display/panel/orisetech,otm8009a.txt | 23 ---------- .../bindings/display/panel/orisetech,otm8009a.yaml | 53 ++++++++++++++++++++++ 2 files changed, 53 insertions(+), 23 deletions(-) delete mode 100644 Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.txt create mode 100644 Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.txt b/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.txt deleted file mode 100644 index 203b03eefb68..000000000000 --- a/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.txt +++ /dev/null @@ -1,23 +0,0 @@ -Orise Tech OTM8009A 3.97" 480x800 TFT LCD panel (MIPI-DSI video mode) - -The Orise Tech OTM8009A is a 3.97" 480x800 TFT LCD panel connected using -a MIPI-DSI video interface. Its backlight is managed through the DSI link. - -Required properties: - - compatible: "orisetech,otm8009a" - - reg: the virtual channel number of a DSI peripheral - -Optional properties: - - reset-gpios: a GPIO spec for the reset pin (active low). - - power-supply: phandle of the regulator that provides the supply voltage. - -Example: -&dsi { - ... - panel@0 { - compatible = "orisetech,otm8009a"; - reg = <0>; - reset-gpios = <&gpioh 7 GPIO_ACTIVE_LOW>; - power-supply = <&v1v8>; - }; -}; diff --git a/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml b/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml new file mode 100644 index 000000000000..6e6ac995c27b --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: (GPL-2.0-only or BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/orisetech,otm8009a.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Orise Tech OTM8009A 3.97" 480x800 TFT LCD panel (MIPI-DSI video mode) + +maintainers: + - Philippe CORNU + +description: | + The Orise Tech OTM8009A is a 3.97" 480x800 TFT LCD panel connected using + a MIPI-DSI video interface. Its backlight is managed through the DSI link. +allOf: + - $ref: panel-common.yaml# + +properties: + + compatible: + const: orisetech,otm8009a + + reg: + maxItems: 1 + description: DSI virtual channel + + enable-gpios: true + port: true + power-supply: true + + reset-gpios: + maxItems: 1 + +additionalProperties: false + +required: + - compatible + - reg + +examples: + - | + dsi@0 { + #address-cells = <1>; + #size-cells = <0>; + panel@0 { + compatible = "orisetech,otm8009a"; + reg = <0>; + reset-gpios = <&gpiof 15 0>; + power-supply = <&v1v8>; + }; + }; +... + -- cgit v1.2.3 From a72e1f684d9b794a80bdbac0b066abf8e80738e0 Mon Sep 17 00:00:00 2001 From: Kamlesh Gurudasani Date: Sun, 9 Feb 2020 23:36:26 +0530 Subject: dt-bindings: add binding for tft displays based on ilitek,ili9486 This binding is for the tft displays based on ilitek,ili9486. ozzmaker,piscreen and waveshare,rpi-lcd-35 are such displays Signed-off-by: Kamlesh Gurudasani Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/8938c7a53598db793bbcddf205dcf35bcc19a18e.1581270802.git.kamlesh.gurudasani@gmail.com --- .../bindings/display/ilitek,ili9486.yaml | 73 ++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/ilitek,ili9486.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml b/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml new file mode 100644 index 000000000000..66e93e563653 --- /dev/null +++ b/Documentation/devicetree/bindings/display/ilitek,ili9486.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/ilitek,ili9486.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Ilitek ILI9486 display panels device tree bindings + +maintainers: + - Kamlesh Gurudasani + +description: + This binding is for display panels using an Ilitek ILI9486 controller in SPI + mode. + +allOf: + - $ref: panel/panel-common.yaml# + +properties: + compatible: + items: + - enum: + # Waveshare 3.5" 320x480 Color TFT LCD + - waveshare,rpi-lcd-35 + # Ozzmaker 3.5" 320x480 Color TFT LCD + - ozzmaker,piscreen + - const: ilitek,ili9486 + + spi-max-frequency: + maximum: 32000000 + + dc-gpios: + maxItems: 1 + description: Display data/command selection (D/CX) + + backlight: true + reg: true + reset-gpios: true + rotation: true + +required: + - compatible + - reg + - dc-gpios + - reset-gpios + +additionalProperties: false + +examples: + - | + #include + + backlight: backlight { + compatible = "gpio-backlight"; + gpios = <&gpio 22 GPIO_ACTIVE_HIGH>; + }; + spi { + #address-cells = <1>; + #size-cells = <0>; + + + display@0{ + compatible = "waveshare,rpi-lcd-35", "ilitek,ili9486"; + reg = <0>; + spi-max-frequency = <32000000>; + dc-gpios = <&gpio0 24 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio0 25 GPIO_ACTIVE_HIGH>; + rotation = <180>; + backlight = <&backlight>; + }; + }; + +... -- cgit v1.2.3 From e2c9e67e44fedcf37b383d3cacc525674bed457f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 16 Jan 2020 23:11:48 +0000 Subject: dt-bindings: spi: sunxi: Document new compatible strings The Allwinner H6 SPI controller has advanced features over the H3 version, but remains compatible with it. Document the usual "specific", "fallback" compatible string pair. Also add the R40 version while at it. Signed-off-by: Andre Przywara Signed-off-by: Maxime Ripard --- .../devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml index 0565dc49e449..243a6b1e66ea 100644 --- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml +++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml @@ -18,9 +18,14 @@ properties: "#size-cells": true compatible: - enum: - - allwinner,sun6i-a31-spi - - allwinner,sun8i-h3-spi + oneOf: + - const: allwinner,sun6i-a31-spi + - const: allwinner,sun8i-h3-spi + - items: + - enum: + - allwinner,sun8i-r40-spi + - allwinner,sun50i-h6-spi + - const: allwinner,sun8i-h3-spi reg: maxItems: 1 -- cgit v1.2.3 From 39b6343d1d4180b83edec31e1d0b36fe1ccfaf77 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 16 Jan 2020 11:36:35 +0800 Subject: dt-bindings: arm: sunxi: add binding for PineTab tablet Add the device tree binding for Pine64's PineTab tablet, which uses Allwinner A64 SoC. Signed-off-by: Icenowy Zheng Reviewed-by: Rob Herring Signed-off-by: Maxime Ripard --- Documentation/devicetree/bindings/arm/sunxi.yaml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml index 327ce6730823..159060b65c5d 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.yaml +++ b/Documentation/devicetree/bindings/arm/sunxi.yaml @@ -636,6 +636,11 @@ properties: - const: pine64,pinebook - const: allwinner,sun50i-a64 + - description: Pine64 PineTab + items: + - const: pine64,pinetab + - const: allwinner,sun50i-a64 + - description: Pine64 SoPine Baseboard items: - const: pine64,sopine-baseboard -- cgit v1.2.3 From 2f789d237c28e4026ee71c0402036fab3a71ce37 Mon Sep 17 00:00:00 2001 From: Bogdan Togorean Date: Tue, 21 Jan 2020 10:27:24 +0200 Subject: dt-bindings: drm: bridge: adv7511: Add ADV7535 support ADV7535 is a part compatible with ADV7533 but it supports 1080p@60hz and v1p2 supply is fixed to 1.8V Signed-off-by: Bogdan Togorean Reviewed-by: Laurent Pinchart Reviewed-by: Andrzej Hajda Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20200121082719.27972-4-bogdan.togorean@analog.com --- .../bindings/display/bridge/adi,adv7511.txt | 23 +++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt index 2c887536258c..e8ddec5d9d91 100644 --- a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt +++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt @@ -1,10 +1,10 @@ -Analog Device ADV7511(W)/13/33 HDMI Encoders +Analog Device ADV7511(W)/13/33/35 HDMI Encoders ----------------------------------------- -The ADV7511, ADV7511W, ADV7513 and ADV7533 are HDMI audio and video transmitters -compatible with HDMI 1.4 and DVI 1.0. They support color space conversion, -S/PDIF, CEC and HDCP. ADV7533 supports the DSI interface for input pixels, while -the others support RGB interface. +The ADV7511, ADV7511W, ADV7513, ADV7533 and ADV7535 are HDMI audio and video +transmitters compatible with HDMI 1.4 and DVI 1.0. They support color space +conversion, S/PDIF, CEC and HDCP. ADV7533/5 supports the DSI interface for input +pixels, while the others support RGB interface. Required properties: @@ -13,6 +13,7 @@ Required properties: "adi,adv7511w" "adi,adv7513" "adi,adv7533" + "adi,adv7535" - reg: I2C slave addresses The ADV7511 internal registers are split into four pages exposed through @@ -52,14 +53,14 @@ The following input format properties are required except in "rgb 1x" and - bgvdd-supply: A 1.8V supply that powers up the BGVDD pin. This is needed only for ADV7511. -The following properties are required for ADV7533: +The following properties are required for ADV7533 and ADV7535: - adi,dsi-lanes: Number of DSI data lanes connected to the DSI host. It should be one of 1, 2, 3 or 4. - a2vdd-supply: 1.8V supply that powers up the A2VDD pin on the chip. - v3p3-supply: A 3.3V supply that powers up the V3P3 pin on the chip. - v1p2-supply: A supply that powers up the V1P2 pin on the chip. It can be - either 1.2V or 1.8V. + either 1.2V or 1.8V for ADV7533 but only 1.8V for ADV7535. Optional properties: @@ -71,9 +72,9 @@ Optional properties: - adi,embedded-sync: The input uses synchronization signals embedded in the data stream (similar to BT.656). Defaults to separate H/V synchronization signals. -- adi,disable-timing-generator: Only for ADV7533. Disables the internal timing - generator. The chip will rely on the sync signals in the DSI data lanes, - rather than generate its own timings for HDMI output. +- adi,disable-timing-generator: Only for ADV7533 and ADV7535. Disables the + internal timing generator. The chip will rely on the sync signals in the + DSI data lanes, rather than generate its own timings for HDMI output. - clocks: from common clock binding: reference to the CEC clock. - clock-names: from common clock binding: must be "cec". - reg-names : Names of maps with programmable addresses. @@ -85,7 +86,7 @@ Required nodes: The ADV7511 has two video ports. Their connections are modelled using the OF graph bindings specified in Documentation/devicetree/bindings/graph.txt. -- Video port 0 for the RGB, YUV or DSI input. In the case of ADV7533, the +- Video port 0 for the RGB, YUV or DSI input. In the case of ADV7533/5, the remote endpoint phandle should be a reference to a valid mipi_dsi_host device node. - Video port 1 for the HDMI output -- cgit v1.2.3 From 45c415f6983eb2ad6607cb0de6bc84357ac78905 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 31 Jan 2020 13:15:52 +0200 Subject: dt-bindings: display: bridge: Add documentation for Toshiba tc358768 TC358768/TC358778 is a Parallel RGB to MIPI DSI bridge. Signed-off-by: Peter Ujfalusi Reviewed-by: Rob Herring Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20200131111553.472-2-peter.ujfalusi@ti.com --- .../bindings/display/bridge/toshiba,tc358768.yaml | 159 +++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml new file mode 100644 index 000000000000..c036a75db8f7 --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/bridge/toshiba,tc358768.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Toschiba TC358768/TC358778 Parallel RGB to MIPI DSI bridge + +maintainers: + - Peter Ujfalusi + +description: | + The TC358768/TC358778 is bridge device which converts RGB to DSI. + +properties: + compatible: + enum: + - toshiba,tc358768 + - toshiba,tc358778 + + reg: + maxItems: 1 + description: base I2C address of the device + + reset-gpios: + maxItems: 1 + description: GPIO connected to active low RESX pin + + vddc-supply: + description: Regulator for 1.2V internal core power. + + vddmipi-supply: + description: Regulator for 1.2V for the MIPI. + + vddio-supply: + description: Regulator for 1.8V - 3.3V IO power. + + clocks: + maxItems: 1 + + clock-names: + const: refclk + + ports: + type: object + + properties: + "#address-cells": + const: 1 + + "#size-cells": + const: 0 + + port@0: + type: object + additionalProperties: false + + description: | + Video port for RGB input + + properties: + reg: + const: 0 + + patternProperties: + endpoint: + type: object + additionalProperties: false + + properties: + data-lines: + enum: [ 16, 18, 24 ] + + remote-endpoint: true + + required: + - reg + + port@1: + type: object + additionalProperties: false + + description: | + Video port for DSI output (panel or connector). + + properties: + reg: + const: 1 + + patternProperties: + endpoint: + type: object + additionalProperties: false + + properties: + remote-endpoint: true + + required: + - reg + + required: + - "#address-cells" + - "#size-cells" + - port@0 + - port@1 + +required: + - compatible + - reg + - vddc-supply + - vddmipi-supply + - vddio-supply + - ports + +additionalProperties: false + +examples: + - | + #include + + i2c1 { + #address-cells = <1>; + #size-cells = <0>; + + dsi_bridge: dsi-bridge@e { + compatible = "toshiba,tc358768"; + reg = <0xe>; + + clocks = <&tc358768_refclk>; + clock-names = "refclk"; + + reset-gpios = <&pcf_display_board 0 GPIO_ACTIVE_LOW>; + + vddc-supply = <&v1_2d>; + vddmipi-supply = <&v1_2d>; + vddio-supply = <&v3_3d>; + + dsi_bridge_ports: ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + rgb_in: endpoint { + remote-endpoint = <&dpi_out>; + data-lines = <24>; + }; + }; + + port@1 { + reg = <1>; + dsi_out: endpoint { + remote-endpoint = <&lcd_in>; + }; + }; + }; + }; + }; + -- cgit v1.2.3 From e9c38f9fc2ccd31befe1bb1605b69213483a15b7 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 8 Jan 2020 11:24:47 -0500 Subject: Documentation,selinux: deprecate setting checkreqprot to 1 Deprecate setting the SELinux checkreqprot tunable to 1 via kernel parameter or /sys/fs/selinux/checkreqprot. Setting it to 0 is left intact for compatibility since Android and some Linux distributions do so for security and treat an inability to set it as a fatal error. Eventually setting it to 0 will become a no-op and the kernel will stop using checkreqprot's value internally altogether. checkreqprot was originally introduced as a compatibility mechanism for legacy userspace and the READ_IMPLIES_EXEC personality flag. However, if set to 1, it weakens security by allowing mappings to be made executable without authorization by policy. The default value for the SECURITY_SELINUX_CHECKREQPROT_VALUE config option was changed from 1 to 0 in commit 2a35d196c160e3 ("selinux: change CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE default") and both Android and Linux distributions began explicitly setting /sys/fs/selinux/checkreqprot to 0 some time ago. Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- .../ABI/obsolete/sysfs-selinux-checkreqprot | 23 ++++++++++++++++++++++ Documentation/admin-guide/kernel-parameters.txt | 1 + 2 files changed, 24 insertions(+) create mode 100644 Documentation/ABI/obsolete/sysfs-selinux-checkreqprot (limited to 'Documentation') diff --git a/Documentation/ABI/obsolete/sysfs-selinux-checkreqprot b/Documentation/ABI/obsolete/sysfs-selinux-checkreqprot new file mode 100644 index 000000000000..49ed9c8fd1e5 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-selinux-checkreqprot @@ -0,0 +1,23 @@ +What: /sys/fs/selinux/checkreqprot +Date: April 2005 (predates git) +KernelVersion: 2.6.12-rc2 (predates git) +Contact: selinux@vger.kernel.org +Description: + + The selinuxfs "checkreqprot" node allows SELinux to be configured + to check the protection requested by userspace for mmap/mprotect + calls instead of the actual protection applied by the kernel. + This was a compatibility mechanism for legacy userspace and + for the READ_IMPLIES_EXEC personality flag. However, if set to + 1, it weakens security by allowing mappings to be made executable + without authorization by policy. The default value of checkreqprot + at boot was changed starting in Linux v4.4 to 0 (i.e. check the + actual protection), and Android and Linux distributions have been + explicitly writing a "0" to /sys/fs/selinux/checkreqprot during + initialization for some time. Support for setting checkreqprot to 1 + will be removed in a future kernel release, at which point the kernel + will always cease using checkreqprot internally and will always + check the actual protections being applied upon mmap/mprotect calls. + The checkreqprot selinuxfs node will remain for backward compatibility + but will discard writes of the "0" value and will reject writes of the + "1" value when this mechanism is removed. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..ff1428d69b2d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -518,6 +518,7 @@ Default value is set via a kernel config option. Value can be changed at runtime via /sys/fs/selinux/checkreqprot. + Setting checkreqprot to 1 is deprecated. cio_ignore= [S390] See Documentation/s390/common_io.rst for details. -- cgit v1.2.3 From f0df2e05a2d93e5feda871db0401399f5836abaa Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Mon, 10 Feb 2020 18:06:52 +0100 Subject: dt-bindings: interconnect: sunxi: Add A64 MBUS compatible A64 contains MBUS controller. Add a compatible for it. Acked-by: Rob Herring Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard --- .../devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml index 9370e64992dd..aa0738b4d534 100644 --- a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml +++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun4i-a10-mbus.yaml @@ -30,6 +30,7 @@ properties: enum: - allwinner,sun5i-a13-mbus - allwinner,sun8i-h3-mbus + - allwinner,sun50i-a64-mbus reg: maxItems: 1 -- cgit v1.2.3 From a7f3e0bbf2c14abc60fb2e51c26b16fa642bf052 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Mon, 10 Feb 2020 18:06:55 +0100 Subject: media: dt-bindings: media: Add Allwinner A64 deinterlace compatible Allwinner A64 SoC also contains deinterlace core, compatible to H3. Add compatible string for it. Acked-by: Rob Herring Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard --- .../devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml b/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml index 2e40f700e84f..8707df613f6c 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml @@ -17,7 +17,11 @@ description: |- properties: compatible: - const: allwinner,sun8i-h3-deinterlace + oneOf: + - const: allwinner,sun8i-h3-deinterlace + - items: + - const: allwinner,sun50i-a64-deinterlace + - const: allwinner,sun8i-h3-deinterlace reg: maxItems: 1 -- cgit v1.2.3 From 65c38513528ffe673a0a9568593b475b16a7031c Mon Sep 17 00:00:00 2001 From: Saravanan Sekar Date: Tue, 4 Feb 2020 12:04:17 +0100 Subject: dt-bindings: regulator: add document bindings for mp5416 Add device tree binding information for mp5416 regulator driver. Signed-off-by: Saravanan Sekar Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200204110419.25933-2-sravanhome@gmail.com Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/mps,mp5416.yaml | 78 ++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 Documentation/devicetree/bindings/regulator/mps,mp5416.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/regulator/mps,mp5416.yaml b/Documentation/devicetree/bindings/regulator/mps,mp5416.yaml new file mode 100644 index 000000000000..f0acce2029fd --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/mps,mp5416.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/regulator/mps,mp5416.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Monolithic Power System MP5416 PMIC + +maintainers: + - Saravanan Sekar + +properties: + $nodename: + pattern: "^pmic@[0-9a-f]{1,2}$" + compatible: + enum: + - mps,mp5416 + + reg: + maxItems: 1 + + regulators: + type: object + description: | + list of regulators provided by this controller, must be named + after their hardware counterparts BUCK[1-4] and LDO[1-4] + + patternProperties: + "^buck[1-4]$": + allOf: + - $ref: "regulator.yaml#" + type: object + + "^ldo[1-4]$": + allOf: + - $ref: "regulator.yaml#" + type: object + + additionalProperties: false + additionalProperties: false + +required: + - compatible + - reg + - regulators + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + pmic@69 { + compatible = "mps,mp5416"; + reg = <0x69>; + + regulators { + + buck1 { + regulator-name = "buck1"; + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <2187500>; + regulator-min-microamp = <3800000>; + regulator-max-microamp = <6800000>; + regulator-boot-on; + }; + + ldo2 { + regulator-name = "ldo2"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <3975000>; + }; + }; + }; + }; +... -- cgit v1.2.3 From 78af6edf180d4f007561fba735d0c5dbd1739e90 Mon Sep 17 00:00:00 2001 From: Chuanhong Guo Date: Mon, 10 Feb 2020 11:41:52 +0800 Subject: dt-binding: spi: add bindings for spi-ar934x Add binding documentation for SPI controller in Qualcomm Atheros AR934x/QCA95xx SoCs. Signed-off-by: Chuanhong Guo Link: https://lore.kernel.org/r/20200210034152.49063-3-gch981213@gmail.com Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/qca,ar934x-spi.yaml | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 Documentation/devicetree/bindings/spi/qca,ar934x-spi.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/qca,ar934x-spi.yaml b/Documentation/devicetree/bindings/spi/qca,ar934x-spi.yaml new file mode 100644 index 000000000000..2aa766759d59 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/qca,ar934x-spi.yaml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/qca,ar934x-spi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Atheros AR934x/QCA95xx SoC SPI controller + +maintainers: + - Chuanhong Guo + +allOf: + - $ref: spi-controller.yaml# + +properties: + compatible: + const: qca,ar934x-spi + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + +required: + - compatible + - reg + - clocks + - '#address-cells' + - '#size-cells' + +examples: + - | + #include + spi: spi@1f000000 { + compatible = "qca,ar934x-spi"; + reg = <0x1f000000 0x1c>; + clocks = <&pll ATH79_CLK_AHB>; + #address-cells = <1>; + #size-cells = <0>; + }; -- cgit v1.2.3 From 7354de9c6e2c4ada06fbf119221eec4ec9eeb3c3 Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Date: Tue, 28 Jan 2020 00:48:55 +0530 Subject: dt-bindings: ath10k: Add new dt entries to identify coex support This adds new dt entries qcom,coexist-support and qcom,coexist-gpio-pin which will be used by ath10k driver to identify coex support of a hardware and notify wifi firmware the gpio pin number. This pin number information is needed for the hardware QCA4019. Signed-off-by: Tamizh Chelvam Reviewed-by: Rob Herring Signed-off-by: Kalle Valo --- Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt index 616c87746d6f..71bf91f97386 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt @@ -91,6 +91,11 @@ Optional properties: - qcom,msa-fixed-perm: Boolean context flag to disable SCM call for statically mapped msa region. +- qcom,coexist-support : should contain eithr "0" or "1" to indicate coex + support by the hardware. +- qcom,coexist-gpio-pin : gpio pin number information to support coex + which will be used by wifi firmware. + Example (to supply PCI based wifi block details): In this example, the node is defined as child node of the PCI controller. @@ -159,6 +164,8 @@ wifi0: wifi@a000000 { qcom,msi_addr = <0x0b006040>; qcom,msi_base = <0x40>; qcom,ath10k-pre-calibration-data = [ 01 02 03 ... ]; + qcom,coexist-support = <1>; + qcom,coexist-gpio-pin = <0x33>; }; Example (to supply wcn3990 SoC wifi block details): -- cgit v1.2.3 From 6caea21957c197bf982ee988ae46af43d8254832 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 11 Feb 2020 14:17:17 +0200 Subject: dt-bindings: panel-simple: Add rocktech,rk101ii01d-ct compatible Add compatible to panel-simple for Rocktech Displays Limited RK101II01D-CT 10.1" TFT 1280x800 Pixels with LVDS interface, LED Backlight, and capacitive touch panel ("goodix,gt928" compatible). Signed-off-by: Jyri Sarha Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/2ffedf45e65943a021ed57a0d174900e43663b27.1581423249.git.jsarha@ti.com --- Documentation/devicetree/bindings/display/panel/panel-simple.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index cf23b0ad077a..e717018d34ae 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -201,6 +201,8 @@ properties: - osddisplays,osd101t2045-53ts # QiaoDian XianShi Corporation 4"3 TFT LCD panel - qiaodian,qd43003c0-40 + # Rocktech Displays Ltd. RK101II01D-CT 10.1" TFT 1280x800 + - rocktech,rk101ii01d-ct # Rocktech Display Ltd. RK070ER9427 800(RGB)x480 TFT LCD panel - rocktech,rk070er9427 # Samsung 12.2" (2560x1600 pixels) TFT LCD panel -- cgit v1.2.3 From 1565e8e8ef0bbccabf62152b93c6505b8041b1e5 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 11 Feb 2020 20:48:24 +0800 Subject: dt-bindings: spi: imx: Add i.MX8MM/i.MX8MN/i.MX8MP compatible Add compatible for imx8mm/imx8mn/imx8mp. Signed-off-by: Anson Huang Acked-by: Rob Herring Link: https://lore.kernel.org/r/1581425307-18567-1-git-send-email-Anson.Huang@nxp.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt index 2d3264140cc5..33bc58f4cf4b 100644 --- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt +++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt @@ -10,7 +10,10 @@ Required properties: - "fsl,imx35-cspi" for SPI compatible with the one integrated on i.MX35 - "fsl,imx51-ecspi" for SPI compatible with the one integrated on i.MX51 - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc - - "fsl,imx8mq-ecspi" for SPI compatible with the one integrated on i.MX8M + - "fsl,imx8mq-ecspi" for SPI compatible with the one integrated on i.MX8MQ + - "fsl,imx8mm-ecspi" for SPI compatible with the one integrated on i.MX8MM + - "fsl,imx8mn-ecspi" for SPI compatible with the one integrated on i.MX8MN + - "fsl,imx8mp-ecspi" for SPI compatible with the one integrated on i.MX8MP - reg : Offset and length of the register set for the device - interrupts : Should contain CSPI/eCSPI interrupt - clocks : Clock specifiers for both ipg and per clocks. -- cgit v1.2.3 From d49850110434e25b3128a4f2f083812a111bf457 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 24 Jan 2020 09:41:30 +0100 Subject: dt-bindings: usb: dwc2: add support for STM32MP15 SoCs USB OTG HS and FS Add the specific compatible string for the DWC2 IP found in the STM32MP15 SoCs. STM32MP15 SoCs uses sensing comparators to detect Vbus valid levels and ID pin state. usb33d-supply described the regulator supplying Vbus and ID sensing comparators. Signed-off-by: Amelie Delaunay Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/usb/dwc2.yaml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml index 71cf7ba32237..e95ba9373023 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.yaml +++ b/Documentation/devicetree/bindings/usb/dwc2.yaml @@ -58,6 +58,8 @@ properties: - const: st,stm32f4x9-fsotg - const: st,stm32f4x9-hsotg - const: st,stm32f7-hsotg + - const: st,stm32mp15-fsotg + - const: st,stm32mp15-hsotg - const: samsung,s3c6400-hsotg reg: @@ -103,6 +105,10 @@ properties: vusb_a-supply: description: phandle to voltage regulator of analog section. + vusb33d-supply: + description: reference to the VBUS and ID sensing comparators supply, in + order to perform OTG operation, used on STM32MP15 SoCs. + dr_mode: enum: [host, peripheral, otg] -- cgit v1.2.3 From 17ff9478ffa3ade728d0fd7351720bb215dcbf7f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 27 Jan 2020 03:20:21 +0100 Subject: dt-bindings: Add ITE Tech prefix Add vendor prefix for ITE Tech Inc, http://www.ite.com.tw/ Signed-off-by: Marek Vasut Cc: Daniel Vetter Cc: Rob Herring Cc: Sean Cross Cc: devicetree@vger.kernel.org To: dri-devel@lists.freedesktop.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 9e67944bec9c..535211eeb1c5 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -469,6 +469,8 @@ patternProperties: description: Intersil "^issi,.*": description: Integrated Silicon Solutions Inc. + "^ite,.*": + description: ITE Tech, Inc. "^itead,.*": description: ITEAD Intelligent Systems Co.Ltd "^iwave,.*": -- cgit v1.2.3 From db1b4b3a7ae19d1abc6d52e9b3dc05b4bb99320f Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 27 Jan 2020 03:58:37 +0000 Subject: dt-bindings: soc: imx: add binding doc for aips bus Add binding doc for fsl,aips-bus Signed-off-by: Peng Fan Signed-off-by: Rob Herring --- .../devicetree/bindings/soc/imx/fsl,aips-bus.yaml | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 Documentation/devicetree/bindings/soc/imx/fsl,aips-bus.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/soc/imx/fsl,aips-bus.yaml b/Documentation/devicetree/bindings/soc/imx/fsl,aips-bus.yaml new file mode 100644 index 000000000000..3cbf2d28a188 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/imx/fsl,aips-bus.yaml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/soc/imx/fsl,aips-bus.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: i.MX AHB to IP Bridge + +maintainers: + - Peng Fan + +description: | + This particular peripheral is designed as the bridge between + AHB bus and peripherals with the lower bandwidth IP Slave (IPS) + buses. + +select: + properties: + compatible: + contains: + const: fsl,aips-bus + required: + - compatible + +properties: + compatible: + items: + - const: fsl,aips-bus + - const: simple-bus + + reg: + maxItems: 1 + +required: + - compatible + - reg + +examples: + - | + bus@30000000 { + compatible = "fsl,aips-bus", "simple-bus"; + reg = <0x30000000 0x400000>; + #address-cells = <1>; + #size-cells = <1>; + ranges; + }; +... -- cgit v1.2.3 From c03b401250cada0a46cdbbcef665548e576f345f Mon Sep 17 00:00:00 2001 From: Kamlesh Gurudasani Date: Mon, 27 Jan 2020 19:55:33 +0530 Subject: dt-bindings: add vendor prefix for OzzMaker and Waveshare Electronics Add vendor prefix for OzzMaker [1] and Waveshare Electronics [2] Both are display manufacturers [1] https://ozzmaker.com/about/ [2] https://www.waveshare.com/contact_us Signed-off-by: Kamlesh Gurudasani Acked-by: Sam Ravnborg Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 535211eeb1c5..ac6aa3332b28 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -729,6 +729,8 @@ patternProperties: description: OmniVision Technologies "^oxsemi,.*": description: Oxford Semiconductor, Ltd. + "^ozzmaker,.*": + description: OzzMaker "^panasonic,.*": description: Panasonic Corporation "^parade,.*": @@ -1054,6 +1056,8 @@ patternProperties: description: Vision Optical Technology Co., Ltd. "^vxt,.*": description: VXT Ltd + "^waveshare,.*": + description: Waveshare Electronics "^wd,.*": description: Western Digital Corp. "^wetek,.*": -- cgit v1.2.3 From 90aeca875f8a33da7cbcac20b7921ca090127346 Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Wed, 29 Jan 2020 09:56:13 +0100 Subject: dt-bindings: display: Convert etnaviv to json-schema Convert etnaviv bindings to yaml format. Move bindings file from display to gpu folder. Signed-off-by: Benjamin Gaignard Signed-off-by: Rob Herring --- .../bindings/display/etnaviv/etnaviv-drm.txt | 36 ----------- .../devicetree/bindings/gpu/vivante,gc.yaml | 69 ++++++++++++++++++++++ 2 files changed, 69 insertions(+), 36 deletions(-) delete mode 100644 Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt create mode 100644 Documentation/devicetree/bindings/gpu/vivante,gc.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt b/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt deleted file mode 100644 index 8def11b16a24..000000000000 --- a/Documentation/devicetree/bindings/display/etnaviv/etnaviv-drm.txt +++ /dev/null @@ -1,36 +0,0 @@ -Vivante GPU core devices -======================== - -Required properties: -- compatible: Should be "vivante,gc" - A more specific compatible is not needed, as the cores contain chip - identification registers at fixed locations, which provide all the - necessary information to the driver. -- reg: should be register base and length as documented in the - datasheet -- interrupts: Should contain the cores interrupt line -- clocks: should contain one clock for entry in clock-names - see Documentation/devicetree/bindings/clock/clock-bindings.txt -- clock-names: - - "bus": AXI/master interface clock - - "reg": AHB/slave interface clock - (only required if GPU can gate slave interface independently) - - "core": GPU core clock - - "shader": Shader clock (only required if GPU has feature PIPE_3D) - -Optional properties: -- power-domains: a power domain consumer specifier according to - Documentation/devicetree/bindings/power/power_domain.txt - -example: - -gpu_3d: gpu@130000 { - compatible = "vivante,gc"; - reg = <0x00130000 0x4000>; - interrupts = <0 9 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX6QDL_CLK_GPU3D_AXI>, - <&clks IMX6QDL_CLK_GPU3D_CORE>, - <&clks IMX6QDL_CLK_GPU3D_SHADER>; - clock-names = "bus", "core", "shader"; - power-domains = <&gpc 1>; -}; diff --git a/Documentation/devicetree/bindings/gpu/vivante,gc.yaml b/Documentation/devicetree/bindings/gpu/vivante,gc.yaml new file mode 100644 index 000000000000..0bc4b38d5cbb --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/vivante,gc.yaml @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpu/vivante,gc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Vivante GPU Bindings + +description: Vivante GPU core devices + +maintainers: + - Lucas Stach + +properties: + compatible: + const: vivante,gc + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: AXI/master interface clock + - description: GPU core clock + - description: Shader clock (only required if GPU has feature PIPE_3D) + - description: AHB/slave interface clock (only required if GPU can gate slave interface independently) + minItems: 1 + maxItems: 4 + + clock-names: + items: + enum: [ bus, core, shader, reg ] + minItems: 1 + maxItems: 4 + + resets: + maxItems: 1 + + power-domains: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + #include + #include + gpu@130000 { + compatible = "vivante,gc"; + reg = <0x00130000 0x4000>; + interrupts = <0 9 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clks IMX6QDL_CLK_GPU3D_AXI>, + <&clks IMX6QDL_CLK_GPU3D_CORE>, + <&clks IMX6QDL_CLK_GPU3D_SHADER>; + clock-names = "bus", "core", "shader"; + power-domains = <&gpc 1>; + }; + +... -- cgit v1.2.3 From 32c5cd478983387620eb9941b42c95877735ed8a Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 10 Dec 2019 00:43:48 +0100 Subject: dt-bindings: arm: fsl: add LS1028A based boards Add the Freescale LS1028A evaluation boards. Signed-off-by: Michael Walle Reviewed-by: Rob Herring Signed-off-by: Shawn Guo --- Documentation/devicetree/bindings/arm/fsl.yaml | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index a8e0b4a813ed..46219a83d955 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -395,6 +395,13 @@ properties: - fsl,ls1021a-twr - const: fsl,ls1021a + - description: LS1028A based Boards + items: + - enum: + - fsl,ls1028a-qds + - fsl,ls1028a-rdb + - const: fsl,ls1028a + - description: LS1043A based Boards items: - enum: -- cgit v1.2.3 From e5ede2cc965bdc25e16bb477603dc2509df5a892 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 10 Dec 2019 00:43:49 +0100 Subject: dt-bindings: arm: fsl: add Kontron sl28 boards Add the Kontron SMARC-sAL28 board, its variants and combination with carriers. Signed-off-by: Michael Walle Reviewed-by: Rob Herring Signed-off-by: Shawn Guo --- Documentation/devicetree/bindings/arm/fsl.yaml | 38 ++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index 46219a83d955..49ab2d83b744 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -402,6 +402,44 @@ properties: - fsl,ls1028a-rdb - const: fsl,ls1028a + - description: Kontron KBox A-230-LS + items: + - const: kontron,kbox-a-230-ls + - const: kontron,sl28-var4 + - const: kontron,sl28 + - const: fsl,ls1028a + - description: + Kontron SMARC-sAL28 board on the SMARC Eval Carrier 2.0 + items: + - enum: + - kontron,sl28-var2-ads2 + - kontron,sl28-var3-ads2 + - kontron,sl28-var4-ads2 + - enum: + - kontron,sl28-var2 + - kontron,sl28-var3 + - kontron,sl28-var4 + - const: kontron,sl28 + - const: fsl,ls1028a + + - description: + Kontron SMARC-sAL28 board (on a generic/undefined carrier) + items: + - enum: + - kontron,sl28-var2 + - kontron,sl28-var3 + - kontron,sl28-var4 + - const: kontron,sl28 + - const: fsl,ls1028a + + - description: + Kontron SMARC-sAL28 board (base). This is used in the base device + tree which is compatible with the overlays provided by the + vendor. + items: + - const: kontron,sl28 + - const: fsl,ls1028a + - description: LS1043A based Boards items: - enum: -- cgit v1.2.3 From 57f0a29c3e089b74a42ff62289a47512ab3f8eba Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Sun, 26 Jan 2020 21:00:13 +0100 Subject: DTS: bindings: wl1251: mark ti,power-gpio as optional It is now only useful for SPI interface. Power control of SDIO mode is done through mmc core. Suggested by: Ulf Hansson Signed-off-by: H. Nikolaus Schaller Signed-off-by: Kalle Valo --- Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt b/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt index f38950560982..88fd28d15eac 100644 --- a/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt +++ b/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt @@ -9,11 +9,12 @@ Required properties: - spi-max-frequency : Maximum SPI clocking speed of device in Hz - interrupts : Should contain interrupt line - vio-supply : phandle to regulator providing VIO -- ti,power-gpio : GPIO connected to chip's PMEN pin Optional properties: - ti,wl1251-has-eeprom : boolean, the wl1251 has an eeprom connected, which provides configuration data (calibration, MAC, ...) +- ti,power-gpio : GPIO connected to chip's PMEN pin if operated in + SPI mode - Please consult Documentation/devicetree/bindings/spi/spi-bus.txt for optional SPI connection related properties, -- cgit v1.2.3 From ae4ba35d363816833ae3e72ecf5d2898c4996baf Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 11 Feb 2020 14:25:28 +0300 Subject: usb: typec: Allow power role swapping even without USB PD Even though originally the USB Type-C Specification did not describe the steps for power role swapping without USB PD contract in place, it did not actually mean power role swap without USB PD was not allowed. The USB Type-C Specification did not clearly separate the data and power roles until in the release 1.2 which is why there also were no clear steps for the scenario where only the power role was swapped without USB PD contract before that. Since in the latest version of the specification the power role swap without USB PD is now clearly mentioned as allowed operation, removing the check that prevented power role swap without USB PD support. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200211112531.86510-4-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-typec | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec index d7647b258c3c..0c2eb26fdc06 100644 --- a/Documentation/ABI/testing/sysfs-class-typec +++ b/Documentation/ABI/testing/sysfs-class-typec @@ -20,13 +20,13 @@ Date: April 2017 Contact: Heikki Krogerus Description: The supported power roles. This attribute can be used to request - power role swap on the port when the port supports USB Power - Delivery. Swapping is supported as synchronous operation, so - write(2) to the attribute will not return until the operation - has finished. The attribute is notified about role changes so - that poll(2) on the attribute wakes up. Change on the role will - also generate uevent KOBJ_CHANGE. The current role is show in - brackets, for example "[source] sink" when in source mode. + power role swap on the port. Swapping is supported as + synchronous operation, so write(2) to the attribute will not + return until the operation has finished. The attribute is + notified about role changes so that poll(2) on the attribute + wakes up. Change on the role will also generate uevent + KOBJ_CHANGE. The current role is show in brackets, for example + "[source] sink" when in source mode. Valid values: source, sink -- cgit v1.2.3 From bbe80c9a89b868e98ef0710cb03ee68dd78a4d8d Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 11 Feb 2020 14:25:30 +0300 Subject: usb: typec: altmode: Remove the notification chain Using the generic notification chain is not reasonable with the alternate modes because it would require dependencies between the drivers of the components that need the notifications, and the typec drivers. There are no users for the alternate mode notifications, so removing the chain and the API for it completely. Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200211112531.86510-6-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/usb/typec_bus.rst | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'Documentation') diff --git a/Documentation/driver-api/usb/typec_bus.rst b/Documentation/driver-api/usb/typec_bus.rst index f47a69bff498..03dfa9c018b7 100644 --- a/Documentation/driver-api/usb/typec_bus.rst +++ b/Documentation/driver-api/usb/typec_bus.rst @@ -53,9 +53,7 @@ in need to reconfigure the pins on the connector, the alternate mode driver needs to notify the bus using :c:func:`typec_altmode_notify()`. The driver passes the negotiated SVID specific pin configuration value to the function as parameter. The bus driver will then configure the mux behind the connector using -that value as the state value for the mux, and also call blocking notification -chain to notify the external drivers about the state of the connector that need -to know it. +that value as the state value for the mux. NOTE: The SVID specific pin configuration values must always start from ``TYPEC_STATE_MODAL``. USB Type-C specification defines two default states for @@ -80,19 +78,6 @@ Helper macro ``TYPEC_MODAL_STATE()`` can also be used:: #define ALTMODEX_CONF_A = TYPEC_MODAL_STATE(0); #define ALTMODEX_CONF_B = TYPEC_MODAL_STATE(1); -Notification chain -~~~~~~~~~~~~~~~~~~ - -The drivers for the components that the alternate modes are designed for need to -get details regarding the results of the negotiation with the partner, and the -pin configuration of the connector. In case of DisplayPort alternate mode for -example, the GPU drivers will need to know those details. In case of -Thunderbolt alternate mode, the thunderbolt drivers will need to know them, and -so on. - -The notification chain is designed for this purpose. The drivers can register -notifiers with :c:func:`typec_altmode_register_notifier()`. - Cable plug alternate modes ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -129,8 +114,3 @@ Cable Plug operations .. kernel-doc:: drivers/usb/typec/bus.c :functions: typec_altmode_get_plug typec_altmode_put_plug - -Notifications -~~~~~~~~~~~~~ -.. kernel-doc:: drivers/usb/typec/class.c - :functions: typec_altmode_register_notifier typec_altmode_unregister_notifier -- cgit v1.2.3 From 239a5791ffd5559f51815df442c4dbbe7fc21ade Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Feb 2020 13:11:42 -0800 Subject: dynamic_debug: allow to work if debugfs is disabled With the realization that having debugfs enabled on "production" systems is generally not a good idea, debugfs is being disabled from more and more platforms over time. However, the functionality of dynamic debugging still is needed at times, and since it relies on debugfs for its user api, having debugfs disabled also forces dynamic debug to be disabled. To get around this, also create the "control" file for dynamic_debug in procfs. This allows people turn on debugging as needed at runtime for individual driverfs and subsystems. Reported-by: many different companies Cc: Jason Baron Acked-by: Will Deacon Link: https://lore.kernel.org/r/20200210211142.GB1373304@kroah.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/dynamic-debug-howto.rst | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst index 252e5ef324e5..0dc2eb8e44e5 100644 --- a/Documentation/admin-guide/dynamic-debug-howto.rst +++ b/Documentation/admin-guide/dynamic-debug-howto.rst @@ -54,6 +54,9 @@ If you make a mistake with the syntax, the write will fail thus:: /dynamic_debug/control -bash: echo: write error: Invalid argument +Note, for systems without 'debugfs' enabled, the control file can be +found in ``/proc/dynamic_debug/control``. + Viewing Dynamic Debug Behaviour =============================== -- cgit v1.2.3 From fdd373a4e0c859c64149aaacd082b6f4e58a6489 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Fri, 24 Jan 2020 14:32:21 -0800 Subject: dt-bindings: clock: Add RPMHCC bindings for SM8250 Add bindings and update documentation for clock rpmh driver on SM8250. Acked-by: Rob Herring Reviewed-by: Vinod Koul Reviewed-by: Bjorn Andersson Signed-off-by: Taniya Das Signed-off-by: Venkata Narendra Kumar Gutta Link: https://lkml.kernel.org/r/1579905147-12142-2-git-send-email-vnkgutta@codeaurora.org Signed-off-by: Stephen Boyd --- Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml index 2cd158f13bab..2b633a4b0bc2 100644 --- a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml @@ -20,6 +20,7 @@ properties: - qcom,sc7180-rpmh-clk - qcom,sdm845-rpmh-clk - qcom,sm8150-rpmh-clk + - qcom,sm8250-rpmh-clk clocks: maxItems: 1 -- cgit v1.2.3 From eadd54c75f1ef1566a6fe004787c028eb095f8b4 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Mon, 27 Jan 2020 10:18:06 +0100 Subject: dt-bindings: Convert the binding file google, cros-ec-codec.txt to yaml format. This was tested and verified with: make dt_binding_check DT_SCHEMA_FILES=Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml Signed-off-by: Dafna Hirschfeld Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200127091806.11403-1-dafna.hirschfeld@collabora.com Signed-off-by: Mark Brown --- .../bindings/sound/google,cros-ec-codec.txt | 44 --------------- .../bindings/sound/google,cros-ec-codec.yaml | 62 ++++++++++++++++++++++ 2 files changed, 62 insertions(+), 44 deletions(-) delete mode 100644 Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt create mode 100644 Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt deleted file mode 100644 index 8ca52dcc5572..000000000000 --- a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.txt +++ /dev/null @@ -1,44 +0,0 @@ -Audio codec controlled by ChromeOS EC - -Google's ChromeOS EC codec is a digital mic codec provided by the -Embedded Controller (EC) and is controlled via a host-command interface. - -An EC codec node should only be found as a sub-node of the EC node (see -Documentation/devicetree/bindings/mfd/cros-ec.txt). - -Required properties: -- compatible: Must contain "google,cros-ec-codec" -- #sound-dai-cells: Should be 1. The cell specifies number of DAIs. - -Optional properties: -- reg: Pysical base address and length of shared memory region from EC. - It contains 3 unsigned 32-bit integer. The first 2 integers - combine to become an unsigned 64-bit physical address. The last - one integer is length of the shared memory. -- memory-region: Shared memory region to EC. A "shared-dma-pool". See - ../reserved-memory/reserved-memory.txt for details. - -Example: - -{ - ... - - reserved_mem: reserved_mem { - compatible = "shared-dma-pool"; - reg = <0 0x52800000 0 0x100000>; - no-map; - }; -} - -cros-ec@0 { - compatible = "google,cros-ec-spi"; - - ... - - cros_ec_codec: ec-codec { - compatible = "google,cros-ec-codec"; - #sound-dai-cells = <1>; - reg = <0x0 0x10500000 0x80000>; - memory-region = <&reserved_mem>; - }; -}; diff --git a/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml new file mode 100644 index 000000000000..94a85d0cbf43 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/google,cros-ec-codec.yaml @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0-only +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/google,cros-ec-codec.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Audio codec controlled by ChromeOS EC + +maintainers: + - Cheng-Yi Chiang + +description: | + Google's ChromeOS EC codec is a digital mic codec provided by the + Embedded Controller (EC) and is controlled via a host-command interface. + An EC codec node should only be found as a sub-node of the EC node (see + Documentation/devicetree/bindings/mfd/cros-ec.txt). + +properties: + compatible: + const: google,cros-ec-codec + + "#sound-dai-cells": + const: 1 + + reg: + items: + - description: | + Physical base address and length of shared memory region from EC. + It contains 3 unsigned 32-bit integer. The first 2 integers + combine to become an unsigned 64-bit physical address. + The last one integer is the length of the shared memory. + + memory-region: + $ref: '/schemas/types.yaml#/definitions/phandle' + description: | + Shared memory region to EC. A "shared-dma-pool". + See ../reserved-memory/reserved-memory.txt for details. + +required: + - compatible + - '#sound-dai-cells' + +additionalProperties: false + +examples: + - | + reserved_mem: reserved_mem { + compatible = "shared-dma-pool"; + reg = <0 0x52800000 0 0x100000>; + no-map; + }; + cros-ec@0 { + compatible = "google,cros-ec-spi"; + #address-cells = <2>; + #size-cells = <1>; + cros_ec_codec: ec-codec { + compatible = "google,cros-ec-codec"; + #sound-dai-cells = <1>; + reg = <0x0 0x10500000 0x80000>; + memory-region = <&reserved_mem>; + }; + }; -- cgit v1.2.3 From d548ed71cb8862c96a1a8d17861bb5dabd1e2299 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Tue, 4 Feb 2020 16:28:37 +1300 Subject: dt-bindings: spi: Document binding for generic SPI multiplexer Add binding documentation for the spi-mux driver. This allows a generic multiplexer to be used to provide access to multiple SPI devices. Signed-off-by: Chris Packham Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200204032838.20739-2-chris.packham@alliedtelesis.co.nz Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-mux.yaml | 89 ++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 Documentation/devicetree/bindings/spi/spi-mux.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/spi-mux.yaml b/Documentation/devicetree/bindings/spi/spi-mux.yaml new file mode 100644 index 000000000000..0ae692dc28b5 --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi-mux.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/spi-mux.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Generic SPI Multiplexer + +description: | + This binding describes a SPI bus multiplexer to route the SPI chip select + signals. This can be used when you need more devices than the SPI controller + has chip selects available. An example setup is shown in ASCII art; the actual + setting of the multiplexer to a channel needs to be done by a specific SPI mux + driver. + + MOSI /--------------------------------+--------+--------+--------\ + MISO |/------------------------------+|-------+|-------+|-------\| + SCL ||/----------------------------+||------+||------+||------\|| + ||| ||| ||| ||| ||| + +------------+ ||| ||| ||| ||| + | SoC ||| | +-+++-+ +-+++-+ +-+++-+ +-+++-+ + | ||| | | dev | | dev | | dev | | dev | + | +--+++-+ | CS-X +------+\ +--+--+ +--+--+ +--+--+ +--+--+ + | | SPI +-|-------+ Mux |\\ CS-0 | | | | + | +------+ | +--+---+\\\-------/ CS-1 | | | + | | | \\\----------------/ CS-2 | | + | +------+ | | \\-------------------------/ CS-3 | + | | ? +-|----------/ \----------------------------------/ + | +------+ | + +------------+ + +allOf: + - $ref: "/schemas/spi/spi-controller.yaml#" + +maintainers: + - Chris Packham + +properties: + compatible: + const: spi-mux + + mux-controls: + maxItems: 1 + +required: + - compatible + - reg + - spi-max-frequency + - mux-controls + +examples: + - | + #include + mux: mux-controller { + compatible = "gpio-mux"; + #mux-control-cells = <0>; + + mux-gpios = <&gpio0 3 GPIO_ACTIVE_HIGH>; + }; + + spi { + #address-cells = <1>; + #size-cells = <0>; + spi@0 { + compatible = "spi-mux"; + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + spi-max-frequency = <100000000>; + + mux-controls = <&mux>; + + spi-flash@0 { + compatible = "jedec,spi-nor"; + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + spi-max-frequency = <40000000>; + }; + + spi-device@1 { + compatible = "lineartechnology,ltc2488"; + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + spi-max-frequency = <10000000>; + }; + }; + }; -- cgit v1.2.3 From b8fa484376901b812a21549962ffed075b77befd Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 14 Jan 2020 09:26:05 +0800 Subject: dt-bindings: clock: Convert i.MX8MQ to json-schema Convert the i.MX8MQ clock binding to DT schema format using json-schema Signed-off-by: Anson Huang Reviewed-by: Rob Herring Signed-off-by: Shawn Guo --- .../devicetree/bindings/clock/imx8mq-clock.txt | 20 ------ .../devicetree/bindings/clock/imx8mq-clock.yaml | 72 ++++++++++++++++++++++ 2 files changed, 72 insertions(+), 20 deletions(-) delete mode 100644 Documentation/devicetree/bindings/clock/imx8mq-clock.txt create mode 100644 Documentation/devicetree/bindings/clock/imx8mq-clock.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/clock/imx8mq-clock.txt b/Documentation/devicetree/bindings/clock/imx8mq-clock.txt deleted file mode 100644 index 52de8263e012..000000000000 --- a/Documentation/devicetree/bindings/clock/imx8mq-clock.txt +++ /dev/null @@ -1,20 +0,0 @@ -* Clock bindings for NXP i.MX8M Quad - -Required properties: -- compatible: Should be "fsl,imx8mq-ccm" -- reg: Address and length of the register set -- #clock-cells: Should be <1> -- clocks: list of clock specifiers, must contain an entry for each required - entry in clock-names -- clock-names: should include the following entries: - - "ckil" - - "osc_25m" - - "osc_27m" - - "clk_ext1" - - "clk_ext2" - - "clk_ext3" - - "clk_ext4" - -The clock consumer should specify the desired clock by having the clock -ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mq-clock.h -for the full list of i.MX8M Quad clock IDs. diff --git a/Documentation/devicetree/bindings/clock/imx8mq-clock.yaml b/Documentation/devicetree/bindings/clock/imx8mq-clock.yaml new file mode 100644 index 000000000000..77790f0fdcd3 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/imx8mq-clock.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/bindings/clock/imx8mq-clock.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP i.MX8M Quad Clock Control Module Binding + +maintainers: + - Anson Huang + +description: | + NXP i.MX8M Quad clock control module is an integrated clock controller, which + generates and supplies to all modules. + +properties: + compatible: + const: fsl,imx8mq-ccm + + reg: + maxItems: 1 + + clocks: + items: + - description: 32k osc + - description: 25m osc + - description: 27m osc + - description: ext1 clock input + - description: ext2 clock input + - description: ext3 clock input + - description: ext4 clock input + + clock-names: + items: + - const: ckil + - const: osc_25m + - const: osc_27m + - const: clk_ext1 + - const: clk_ext2 + - const: clk_ext3 + - const: clk_ext4 + + '#clock-cells': + const: 1 + description: + The clock consumer should specify the desired clock by having the clock + ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mq-clock.h + for the full list of i.MX8M Quad clock IDs. + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + +examples: + # Clock Control Module node: + - | + clk: clock-controller@30380000 { + compatible = "fsl,imx8mq-ccm"; + reg = <0x30380000 0x10000>; + #clock-cells = <1>; + clocks = <&ckil>, <&osc_25m>, <&osc_27m>, + <&clk_ext1>, <&clk_ext2>, + <&clk_ext3>, <&clk_ext4>; + clock-names = "ckil", "osc_25m", "osc_27m", + "clk_ext1", "clk_ext2", + "clk_ext3", "clk_ext4"; + }; + +... -- cgit v1.2.3 From 16d848e11836a17702534b65007176d12246d0b3 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 14 Jan 2020 09:26:06 +0800 Subject: dt-bindings: clock: Convert i.MX8MM to json-schema Convert the i.MX8MM clock binding to DT schema format using json-schema Signed-off-by: Anson Huang Reviewed-by: Rob Herring Signed-off-by: Shawn Guo --- .../devicetree/bindings/clock/imx8mm-clock.txt | 29 --------- .../devicetree/bindings/clock/imx8mm-clock.yaml | 68 ++++++++++++++++++++++ 2 files changed, 68 insertions(+), 29 deletions(-) delete mode 100644 Documentation/devicetree/bindings/clock/imx8mm-clock.txt create mode 100644 Documentation/devicetree/bindings/clock/imx8mm-clock.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/clock/imx8mm-clock.txt b/Documentation/devicetree/bindings/clock/imx8mm-clock.txt deleted file mode 100644 index 8e4ab9e619a1..000000000000 --- a/Documentation/devicetree/bindings/clock/imx8mm-clock.txt +++ /dev/null @@ -1,29 +0,0 @@ -* Clock bindings for NXP i.MX8M Mini - -Required properties: -- compatible: Should be "fsl,imx8mm-ccm" -- reg: Address and length of the register set -- #clock-cells: Should be <1> -- clocks: list of clock specifiers, must contain an entry for each required - entry in clock-names -- clock-names: should include the following entries: - - "osc_32k" - - "osc_24m" - - "clk_ext1" - - "clk_ext2" - - "clk_ext3" - - "clk_ext4" - -clk: clock-controller@30380000 { - compatible = "fsl,imx8mm-ccm"; - reg = <0x0 0x30380000 0x0 0x10000>; - #clock-cells = <1>; - clocks = <&osc_32k>, <&osc_24m>, <&clk_ext1>, <&clk_ext2>, - <&clk_ext3>, <&clk_ext4>; - clock-names = "osc_32k", "osc_24m", "clk_ext1", "clk_ext2", - "clk_ext3", "clk_ext4"; -}; - -The clock consumer should specify the desired clock by having the clock -ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mm-clock.h -for the full list of i.MX8M Mini clock IDs. diff --git a/Documentation/devicetree/bindings/clock/imx8mm-clock.yaml b/Documentation/devicetree/bindings/clock/imx8mm-clock.yaml new file mode 100644 index 000000000000..f5be181bd21d --- /dev/null +++ b/Documentation/devicetree/bindings/clock/imx8mm-clock.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/bindings/clock/imx8mm-clock.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP i.MX8M Mini Clock Control Module Binding + +maintainers: + - Anson Huang + +description: | + NXP i.MX8M Mini clock control module is an integrated clock controller, which + generates and supplies to all modules. + +properties: + compatible: + const: fsl,imx8mm-ccm + + reg: + maxItems: 1 + + clocks: + items: + - description: 32k osc + - description: 24m osc + - description: ext1 clock input + - description: ext2 clock input + - description: ext3 clock input + - description: ext4 clock input + + clock-names: + items: + - const: osc_32k + - const: osc_24m + - const: clk_ext1 + - const: clk_ext2 + - const: clk_ext3 + - const: clk_ext4 + + '#clock-cells': + const: 1 + description: + The clock consumer should specify the desired clock by having the clock + ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mm-clock.h + for the full list of i.MX8M Mini clock IDs. + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + +examples: + # Clock Control Module node: + - | + clk: clock-controller@30380000 { + compatible = "fsl,imx8mm-ccm"; + reg = <0x30380000 0x10000>; + #clock-cells = <1>; + clocks = <&osc_32k>, <&osc_24m>, <&clk_ext1>, <&clk_ext2>, + <&clk_ext3>, <&clk_ext4>; + clock-names = "osc_32k", "osc_24m", "clk_ext1", "clk_ext2", + "clk_ext3", "clk_ext4"; + }; + +... -- cgit v1.2.3 From b86a8ad2870e100e2f3ee1ae6c7741e3d771b89d Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 14 Jan 2020 09:26:07 +0800 Subject: dt-bindings: clock: Refine i.MX8MN clock binding Refine i.MX8MN clock binding by removing useless content and updating the example, it makes all i.MX8M SoCs' clock binding aligned. Signed-off-by: Anson Huang Reviewed-by: Rob Herring Signed-off-by: Shawn Guo --- .../devicetree/bindings/clock/imx8mn-clock.yaml | 48 +--------------------- 1 file changed, 2 insertions(+), 46 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/clock/imx8mn-clock.yaml b/Documentation/devicetree/bindings/clock/imx8mn-clock.yaml index cd0b8a341321..49730474c1a7 100644 --- a/Documentation/devicetree/bindings/clock/imx8mn-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx8mn-clock.yaml @@ -40,7 +40,7 @@ properties: '#clock-cells': const: 1 - description: | + description: The clock consumer should specify the desired clock by having the clock ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mn-clock.h for the full list of i.MX8M Nano clock IDs. @@ -57,7 +57,7 @@ examples: - | clk: clock-controller@30380000 { compatible = "fsl,imx8mn-ccm"; - reg = <0x0 0x30380000 0x0 0x10000>; + reg = <0x30380000 0x10000>; #clock-cells = <1>; clocks = <&osc_32k>, <&osc_24m>, <&clk_ext1>, <&clk_ext2>, <&clk_ext3>, <&clk_ext4>; @@ -65,48 +65,4 @@ examples: "clk_ext2", "clk_ext3", "clk_ext4"; }; - # Required external clocks for Clock Control Module node: - - | - osc_32k: clock-osc-32k { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <32768>; - clock-output-names = "osc_32k"; - }; - - osc_24m: clock-osc-24m { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <24000000>; - clock-output-names = "osc_24m"; - }; - - clk_ext1: clock-ext1 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <133000000>; - clock-output-names = "clk_ext1"; - }; - - clk_ext2: clock-ext2 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <133000000>; - clock-output-names = "clk_ext2"; - }; - - clk_ext3: clock-ext3 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency = <133000000>; - clock-output-names = "clk_ext3"; - }; - - clk_ext4: clock-ext4 { - compatible = "fixed-clock"; - #clock-cells = <0>; - clock-frequency= <133000000>; - clock-output-names = "clk_ext4"; - }; - ... -- cgit v1.2.3 From 5a7ea52b6faec6f8877e49645a80349b2d970f0a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Feb 2020 23:58:17 +0100 Subject: PM: QoS: Drop pm_qos_update_request_timeout() The pm_qos_update_request_timeout() function is not called from anywhere, so drop it along with the work member in struct pm_qos_request needed by it. Also drop the useless pm_qos_update_request_timeout trace event that is only triggered by that function (so it never triggers at all) and update the trace events documentation accordingly. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- Documentation/trace/events-power.rst | 2 -- 1 file changed, 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/events-power.rst b/Documentation/trace/events-power.rst index 2ef318962e29..eec7453a168e 100644 --- a/Documentation/trace/events-power.rst +++ b/Documentation/trace/events-power.rst @@ -78,11 +78,9 @@ target/flags update. pm_qos_add_request "pm_qos_class=%s value=%d" pm_qos_update_request "pm_qos_class=%s value=%d" pm_qos_remove_request "pm_qos_class=%s value=%d" - pm_qos_update_request_timeout "pm_qos_class=%s value=%d, timeout_us=%ld" The first parameter gives the QoS class name (e.g. "CPU_DMA_LATENCY"). The second parameter is value to be added/updated/removed. -The third parameter is timeout value in usec. :: pm_qos_update_target "action=%s prev_value=%d curr_value=%d" -- cgit v1.2.3 From 333eed7d20069e2d80446f5fdf9ac3868b55e7b9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:06:17 +0100 Subject: PM: QoS: Simplify definitions of CPU latency QoS trace events Modify the definitions of the CPU latency QoS trace events to take one argument (since PM_QOS_CPU_DMA_LATENCY is always passed as the pm_qos_class argument to them) and update the documentation of them accordingly (while at it, make it explicitly mention CPU latency QoS and relocate it after the device PM QoS trace events documentation). The names and output format of the trace events do not change to preserve user space compatibility. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- Documentation/trace/events-power.rst | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/events-power.rst b/Documentation/trace/events-power.rst index eec7453a168e..f45bf11fa88d 100644 --- a/Documentation/trace/events-power.rst +++ b/Documentation/trace/events-power.rst @@ -73,14 +73,6 @@ The second parameter is the power domain target state. ================ The PM QoS events are used for QoS add/update/remove request and for target/flags update. -:: - - pm_qos_add_request "pm_qos_class=%s value=%d" - pm_qos_update_request "pm_qos_class=%s value=%d" - pm_qos_remove_request "pm_qos_class=%s value=%d" - -The first parameter gives the QoS class name (e.g. "CPU_DMA_LATENCY"). -The second parameter is value to be added/updated/removed. :: pm_qos_update_target "action=%s prev_value=%d curr_value=%d" @@ -90,7 +82,7 @@ The first parameter gives the QoS action name (e.g. "ADD_REQ"). The second parameter is the previous QoS value. The third parameter is the current QoS value to update. -And, there are also events used for device PM QoS add/update/remove request. +There are also events used for device PM QoS add/update/remove request. :: dev_pm_qos_add_request "device=%s type=%s new_value=%d" @@ -101,3 +93,12 @@ The first parameter gives the device name which tries to add/update/remove QoS requests. The second parameter gives the request type (e.g. "DEV_PM_QOS_RESUME_LATENCY"). The third parameter is value to be added/updated/removed. + +And, there are events used for CPU latency QoS add/update/remove request. +:: + + pm_qos_add_request "value=%d" + pm_qos_update_request "value=%d" + pm_qos_remove_request "value=%d" + +The parameter is the value to be added/updated/removed. -- cgit v1.2.3 From 59528807715f81f123631f57446b08219efa7526 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 14 Jan 2020 17:52:23 -0600 Subject: soundwire: stream: update state machine and add state checks The state machine and notes don't accurately explain or allow transitions from STREAM_DEPREPARED and STREAM_DISABLED. Add more explanations and allow for more transitions as a result of a trigger_stop(), trigger_suspend() and prepare(), depending on the ALSA/ASoC layer behavior defined by the INFO_RESUME and INFO_PAUSE flags. Also add basic checks to help debug inconsistent states and illegal state machine transitions. Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20200114235227.14502-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Vinod Koul --- Documentation/driver-api/soundwire/stream.rst | 61 +++++++++++++++++++-------- 1 file changed, 44 insertions(+), 17 deletions(-) (limited to 'Documentation') diff --git a/Documentation/driver-api/soundwire/stream.rst b/Documentation/driver-api/soundwire/stream.rst index 5351bd2f34a8..8bceece51554 100644 --- a/Documentation/driver-api/soundwire/stream.rst +++ b/Documentation/driver-api/soundwire/stream.rst @@ -156,22 +156,27 @@ Below shows the SoundWire stream states and state transition diagram. :: +-----------+ +------------+ +----------+ +----------+ | ALLOCATED +---->| CONFIGURED +---->| PREPARED +---->| ENABLED | | STATE | | STATE | | STATE | | STATE | - +-----------+ +------------+ +----------+ +----+-----+ - ^ - | - | - v - +----------+ +------------+ +----+-----+ + +-----------+ +------------+ +---+--+---+ +----+-----+ + ^ ^ ^ + | | | + __| |___________ | + | | | + v | v + +----------+ +-----+------+ +-+--+-----+ | RELEASED |<----------+ DEPREPARED |<-------+ DISABLED | | STATE | | STATE | | STATE | +----------+ +------------+ +----------+ -NOTE: State transition between prepare and deprepare is supported in Spec -but not in the software (subsystem) +NOTE: State transitions between ``SDW_STREAM_ENABLED`` and +``SDW_STREAM_DISABLED`` are only relevant when then INFO_PAUSE flag is +supported at the ALSA/ASoC level. Likewise the transition between +``SDW_DISABLED_STATE`` and ``SDW_PREPARED_STATE`` depends on the +INFO_RESUME flag. -NOTE2: Stream state transition checks need to be handled by caller -framework, for example ALSA/ASoC. No checks for stream transition exist in -SoundWire subsystem. +NOTE2: The framework implements basic state transition checks, but +does not e.g. check if a transition from DISABLED to ENABLED is valid +on a specific platform. Such tests need to be added at the ALSA/ASoC +level. Stream State Operations ----------------------- @@ -246,6 +251,9 @@ SDW_STREAM_PREPARED Prepare state of stream. Operations performed before entering in this state: + (0) Steps 1 and 2 are omitted in the case of a resume operation, + where the bus bandwidth is known. + (1) Bus parameters such as bandwidth, frame shape, clock frequency, are computed based on current stream as well as already active stream(s) on Bus. Re-computation is required to accommodate current @@ -270,9 +278,11 @@ Prepare state of stream. Operations performed before entering in this state: After all above operations are successful, stream state is set to ``SDW_STREAM_PREPARED``. -Bus implements below API for PREPARE state which needs to be called once per -stream. From ASoC DPCM framework, this stream state is linked to -.prepare() operation. +Bus implements below API for PREPARE state which needs to be called +once per stream. From ASoC DPCM framework, this stream state is linked +to .prepare() operation. Since the .trigger() operations may not +follow the .prepare(), a direct transition from +``SDW_STREAM_PREPARED`` to ``SDW_STREAM_DEPREPARED`` is allowed. .. code-block:: c @@ -332,6 +342,14 @@ Bus implements below API for DISABLED state which needs to be called once per stream. From ASoC DPCM framework, this stream state is linked to .trigger() stop operation. +When the INFO_PAUSE flag is supported, a direct transition to +``SDW_STREAM_ENABLED`` is allowed. + +For resume operations where ASoC will use the .prepare() callback, the +stream can transition from ``SDW_STREAM_DISABLED`` to +``SDW_STREAM_PREPARED``, with all required settings restored but +without updating the bandwidth and bit allocation. + .. code-block:: c int sdw_disable_stream(struct sdw_stream_runtime * stream); @@ -353,9 +371,18 @@ state: After all above operations are successful, stream state is set to ``SDW_STREAM_DEPREPARED``. -Bus implements below API for DEPREPARED state which needs to be called once -per stream. From ASoC DPCM framework, this stream state is linked to -.trigger() stop operation. +Bus implements below API for DEPREPARED state which needs to be called +once per stream. ALSA/ASoC do not have a concept of 'deprepare', and +the mapping from this stream state to ALSA/ASoC operation may be +implementation specific. + +When the INFO_PAUSE flag is supported, the stream state is linked to +the .hw_free() operation - the stream is not deprepared on a +TRIGGER_STOP. + +Other implementations may transition to the ``SDW_STREAM_DEPREPARED`` +state on TRIGGER_STOP, should they require a transition through the +``SDW_STREAM_PREPARED`` state. .. code-block:: c -- cgit v1.2.3 From ff095986e6b449bc095b018388519f1e413a7fa8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 12 Feb 2020 11:48:40 +0100 Subject: dt-bindings: dma: ti-edma: fix example compatible property Make sure that the compatible string in the edma1_tptc1 example node matches the binding by removing the space between the manufacturer and model. Signed-off-by: Johan Hovold Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200212104840.20393-1-johan@kernel.org Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti-edma.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/ti-edma.txt b/Documentation/devicetree/bindings/dma/ti-edma.txt index 0e1398f93aa2..29fcd37082e8 100644 --- a/Documentation/devicetree/bindings/dma/ti-edma.txt +++ b/Documentation/devicetree/bindings/dma/ti-edma.txt @@ -180,7 +180,7 @@ edma1_tptc0: tptc@27b0000 { }; edma1_tptc1: tptc@27b8000 { - compatible = "ti, k2g-edma3-tptc", "ti,edma3-tptc"; + compatible = "ti,k2g-edma3-tptc", "ti,edma3-tptc"; reg = <0x027b8000 0x400>; power-domains = <&k2g_pds 0x4f>; }; -- cgit v1.2.3 From 34e75cf4beb1a88a61b7c76b5fdc99c43cff8594 Mon Sep 17 00:00:00 2001 From: "Daniel W. S. Almeida" Date: Wed, 29 Jan 2020 01:49:13 -0300 Subject: Documentation: nfs: convert pnfs.txt to ReST Convert pnfs.txt to ReST. Content remains mostly unchanged. Signed-off-by: Daniel W. S. Almeida Link: https://lore.kernel.org/r/20200129044917.566906-2-dwlsalmeida@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/nfs/index.rst | 9 ++++ Documentation/filesystems/nfs/pnfs.rst | 78 +++++++++++++++++++++++++++++++++ Documentation/filesystems/nfs/pnfs.txt | 73 ------------------------------ 4 files changed, 88 insertions(+), 73 deletions(-) create mode 100644 Documentation/filesystems/nfs/index.rst create mode 100644 Documentation/filesystems/nfs/pnfs.rst delete mode 100644 Documentation/filesystems/nfs/pnfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 386eaad008b2..45d791905e91 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -51,3 +51,4 @@ Documentation for filesystem implementations. overlayfs virtiofs vfat + nfs/index diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst new file mode 100644 index 000000000000..d19ba592779a --- /dev/null +++ b/Documentation/filesystems/nfs/index.rst @@ -0,0 +1,9 @@ +=============================== +NFS +=============================== + + +.. toctree:: + :maxdepth: 1 + + pnfs diff --git a/Documentation/filesystems/nfs/pnfs.rst b/Documentation/filesystems/nfs/pnfs.rst new file mode 100644 index 000000000000..7c470ecdc3a9 --- /dev/null +++ b/Documentation/filesystems/nfs/pnfs.rst @@ -0,0 +1,78 @@ +========================== +Reference counting in pnfs +========================== + +The are several inter-related caches. We have layouts which can +reference multiple devices, each of which can reference multiple data servers. +Each data server can be referenced by multiple devices. Each device +can be referenced by multiple layouts. To keep all of this straight, +we need to reference count. + + +struct pnfs_layout_hdr +====================== + +The on-the-wire command LAYOUTGET corresponds to struct +pnfs_layout_segment, usually referred to by the variable name lseg. +Each nfs_inode may hold a pointer to a cache of these layout +segments in nfsi->layout, of type struct pnfs_layout_hdr. + +We reference the header for the inode pointing to it, across each +outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN, +LAYOUTCOMMIT), and for each lseg held within. + +Each header is also (when non-empty) put on a list associated with +struct nfs_client (cl_layouts). Being put on this list does not bump +the reference count, as the layout is kept around by the lseg that +keeps it in the list. + +deviceid_cache +============== + +lsegs reference device ids, which are resolved per nfs_client and +layout driver type. The device ids are held in a RCU cache (struct +nfs4_deviceid_cache). The cache itself is referenced across each +mount. The entries (struct nfs4_deviceid) themselves are held across +the lifetime of each lseg referencing them. + +RCU is used because the deviceid is basically a write once, read many +data structure. The hlist size of 32 buckets needs better +justification, but seems reasonable given that we can have multiple +deviceid's per filesystem, and multiple filesystems per nfs_client. + +The hash code is copied from the nfsd code base. A discussion of +hashing and variations of this algorithm can be found `here. +`_ + +data server cache +================= + +file driver devices refer to data servers, which are kept in a module +level cache. Its reference is held over the lifetime of the deviceid +pointing to it. + +lseg +==== + +lseg maintains an extra reference corresponding to the NFS_LSEG_VALID +bit which holds it in the pnfs_layout_hdr's list. When the final lseg +is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED +bit is set, preventing any new lsegs from being added. + +layout drivers +============== + +PNFS utilizes what is called layout drivers. The STD defines 4 basic +layout types: "files", "objects", "blocks", and "flexfiles". For each +of these types there is a layout-driver with a common function-vectors +table which are called by the nfs-client pnfs-core to implement the +different layout types. + +Files-layout-driver code is in: fs/nfs/filelayout/.. directory +Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory +Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory + +blocks-layout setup +=================== + +TODO: Document the setup needs of the blocks layout driver diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt deleted file mode 100644 index 80dc0bdc302a..000000000000 --- a/Documentation/filesystems/nfs/pnfs.txt +++ /dev/null @@ -1,73 +0,0 @@ -Reference counting in pnfs: -========================== - -The are several inter-related caches. We have layouts which can -reference multiple devices, each of which can reference multiple data servers. -Each data server can be referenced by multiple devices. Each device -can be referenced by multiple layouts. To keep all of this straight, -we need to reference count. - - -struct pnfs_layout_hdr ----------------------- -The on-the-wire command LAYOUTGET corresponds to struct -pnfs_layout_segment, usually referred to by the variable name lseg. -Each nfs_inode may hold a pointer to a cache of these layout -segments in nfsi->layout, of type struct pnfs_layout_hdr. - -We reference the header for the inode pointing to it, across each -outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN, -LAYOUTCOMMIT), and for each lseg held within. - -Each header is also (when non-empty) put on a list associated with -struct nfs_client (cl_layouts). Being put on this list does not bump -the reference count, as the layout is kept around by the lseg that -keeps it in the list. - -deviceid_cache --------------- -lsegs reference device ids, which are resolved per nfs_client and -layout driver type. The device ids are held in a RCU cache (struct -nfs4_deviceid_cache). The cache itself is referenced across each -mount. The entries (struct nfs4_deviceid) themselves are held across -the lifetime of each lseg referencing them. - -RCU is used because the deviceid is basically a write once, read many -data structure. The hlist size of 32 buckets needs better -justification, but seems reasonable given that we can have multiple -deviceid's per filesystem, and multiple filesystems per nfs_client. - -The hash code is copied from the nfsd code base. A discussion of -hashing and variations of this algorithm can be found at: -http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809 - -data server cache ------------------ -file driver devices refer to data servers, which are kept in a module -level cache. Its reference is held over the lifetime of the deviceid -pointing to it. - -lseg ----- -lseg maintains an extra reference corresponding to the NFS_LSEG_VALID -bit which holds it in the pnfs_layout_hdr's list. When the final lseg -is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED -bit is set, preventing any new lsegs from being added. - -layout drivers --------------- - -PNFS utilizes what is called layout drivers. The STD defines 4 basic -layout types: "files", "objects", "blocks", and "flexfiles". For each -of these types there is a layout-driver with a common function-vectors -table which are called by the nfs-client pnfs-core to implement the -different layout types. - -Files-layout-driver code is in: fs/nfs/filelayout/.. directory -Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory -Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory - -blocks-layout setup -------------------- - -TODO: Document the setup needs of the blocks layout driver -- cgit v1.2.3 From f0bf8a988b26e75cc6fc28a44a745cb354a2b5a6 Mon Sep 17 00:00:00 2001 From: "Daniel W. S. Almeida" Date: Wed, 29 Jan 2020 01:49:14 -0300 Subject: Documentation: nfs: rpc-cache: convert to ReST Convert rpc-cache.txt to ReST. Changes aim to improve presentation but the content itself remains mostly the same. Signed-off-by: Daniel W. S. Almeida Link: https://lore.kernel.org/r/20200129044917.566906-3-dwlsalmeida@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/nfs/index.rst | 1 + Documentation/filesystems/nfs/rpc-cache.rst | 220 ++++++++++++++++++++++++++++ Documentation/filesystems/nfs/rpc-cache.txt | 202 ------------------------- 3 files changed, 221 insertions(+), 202 deletions(-) create mode 100644 Documentation/filesystems/nfs/rpc-cache.rst delete mode 100644 Documentation/filesystems/nfs/rpc-cache.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst index d19ba592779a..52f4956e7770 100644 --- a/Documentation/filesystems/nfs/index.rst +++ b/Documentation/filesystems/nfs/index.rst @@ -7,3 +7,4 @@ NFS :maxdepth: 1 pnfs + rpc-cache diff --git a/Documentation/filesystems/nfs/rpc-cache.rst b/Documentation/filesystems/nfs/rpc-cache.rst new file mode 100644 index 000000000000..bb164eea969b --- /dev/null +++ b/Documentation/filesystems/nfs/rpc-cache.rst @@ -0,0 +1,220 @@ +========= +RPC Cache +========= + +This document gives a brief introduction to the caching +mechanisms in the sunrpc layer that is used, in particular, +for NFS authentication. + +Caches +====== + +The caching replaces the old exports table and allows for +a wide variety of values to be caches. + +There are a number of caches that are similar in structure though +quite possibly very different in content and use. There is a corpus +of common code for managing these caches. + +Examples of caches that are likely to be needed are: + + - mapping from IP address to client name + - mapping from client name and filesystem to export options + - mapping from UID to list of GIDs, to work around NFS's limitation + of 16 gids. + - mappings between local UID/GID and remote UID/GID for sites that + do not have uniform uid assignment + - mapping from network identify to public key for crypto authentication. + +The common code handles such things as: + + - general cache lookup with correct locking + - supporting 'NEGATIVE' as well as positive entries + - allowing an EXPIRED time on cache items, and removing + items after they expire, and are no longer in-use. + - making requests to user-space to fill in cache entries + - allowing user-space to directly set entries in the cache + - delaying RPC requests that depend on as-yet incomplete + cache entries, and replaying those requests when the cache entry + is complete. + - clean out old entries as they expire. + +Creating a Cache +---------------- + +- A cache needs a datum to store. This is in the form of a + structure definition that must contain a struct cache_head + as an element, usually the first. + It will also contain a key and some content. + Each cache element is reference counted and contains + expiry and update times for use in cache management. +- A cache needs a "cache_detail" structure that + describes the cache. This stores the hash table, some + parameters for cache management, and some operations detailing how + to work with particular cache items. + + The operations are: + + struct cache_head \*alloc(void) + This simply allocates appropriate memory and returns + a pointer to the cache_detail embedded within the + structure + + void cache_put(struct kref \*) + This is called when the last reference to an item is + dropped. The pointer passed is to the 'ref' field + in the cache_head. cache_put should release any + references create by 'cache_init' and, if CACHE_VALID + is set, any references created by cache_update. + It should then release the memory allocated by + 'alloc'. + + int match(struct cache_head \*orig, struct cache_head \*new) + test if the keys in the two structures match. Return + 1 if they do, 0 if they don't. + + void init(struct cache_head \*orig, struct cache_head \*new) + Set the 'key' fields in 'new' from 'orig'. This may + include taking references to shared objects. + + void update(struct cache_head \*orig, struct cache_head \*new) + Set the 'content' fileds in 'new' from 'orig'. + + int cache_show(struct seq_file \*m, struct cache_detail \*cd, struct cache_head \*h) + Optional. Used to provide a /proc file that lists the + contents of a cache. This should show one item, + usually on just one line. + + int cache_request(struct cache_detail \*cd, struct cache_head \*h, char \*\*bpp, int \*blen) + Format a request to be send to user-space for an item + to be instantiated. \*bpp is a buffer of size \*blen. + bpp should be moved forward over the encoded message, + and \*blen should be reduced to show how much free + space remains. Return 0 on success or <0 if not + enough room or other problem. + + int cache_parse(struct cache_detail \*cd, char \*buf, int len) + A message from user space has arrived to fill out a + cache entry. It is in 'buf' of length 'len'. + cache_parse should parse this, find the item in the + cache with sunrpc_cache_lookup_rcu, and update the item + with sunrpc_cache_update. + + +- A cache needs to be registered using cache_register(). This + includes it on a list of caches that will be regularly + cleaned to discard old data. + +Using a cache +------------- + +To find a value in a cache, call sunrpc_cache_lookup_rcu passing a pointer +to the cache_head in a sample item with the 'key' fields filled in. +This will be passed to ->match to identify the target entry. If no +entry is found, a new entry will be create, added to the cache, and +marked as not containing valid data. + +The item returned is typically passed to cache_check which will check +if the data is valid, and may initiate an up-call to get fresh data. +cache_check will return -ENOENT in the entry is negative or if an up +call is needed but not possible, -EAGAIN if an upcall is pending, +or 0 if the data is valid; + +cache_check can be passed a "struct cache_req\*". This structure is +typically embedded in the actual request and can be used to create a +deferred copy of the request (struct cache_deferred_req). This is +done when the found cache item is not uptodate, but the is reason to +believe that userspace might provide information soon. When the cache +item does become valid, the deferred copy of the request will be +revisited (->revisit). It is expected that this method will +reschedule the request for processing. + +The value returned by sunrpc_cache_lookup_rcu can also be passed to +sunrpc_cache_update to set the content for the item. A second item is +passed which should hold the content. If the item found by _lookup +has valid data, then it is discarded and a new item is created. This +saves any user of an item from worrying about content changing while +it is being inspected. If the item found by _lookup does not contain +valid data, then the content is copied across and CACHE_VALID is set. + +Populating a cache +------------------ + +Each cache has a name, and when the cache is registered, a directory +with that name is created in /proc/net/rpc + +This directory contains a file called 'channel' which is a channel +for communicating between kernel and user for populating the cache. +This directory may later contain other files of interacting +with the cache. + +The 'channel' works a bit like a datagram socket. Each 'write' is +passed as a whole to the cache for parsing and interpretation. +Each cache can treat the write requests differently, but it is +expected that a message written will contain: + + - a key + - an expiry time + - a content. + +with the intention that an item in the cache with the give key +should be create or updated to have the given content, and the +expiry time should be set on that item. + +Reading from a channel is a bit more interesting. When a cache +lookup fails, or when it succeeds but finds an entry that may soon +expire, a request is lodged for that cache item to be updated by +user-space. These requests appear in the channel file. + +Successive reads will return successive requests. +If there are no more requests to return, read will return EOF, but a +select or poll for read will block waiting for another request to be +added. + +Thus a user-space helper is likely to:: + + open the channel. + select for readable + read a request + write a response + loop. + +If it dies and needs to be restarted, any requests that have not been +answered will still appear in the file and will be read by the new +instance of the helper. + +Each cache should define a "cache_parse" method which takes a message +written from user-space and processes it. It should return an error +(which propagates back to the write syscall) or 0. + +Each cache should also define a "cache_request" method which +takes a cache item and encodes a request into the buffer +provided. + +.. note:: + If a cache has no active readers on the channel, and has had not + active readers for more than 60 seconds, further requests will not be + added to the channel but instead all lookups that do not find a valid + entry will fail. This is partly for backward compatibility: The + previous nfs exports table was deemed to be authoritative and a + failed lookup meant a definite 'no'. + +request/response format +----------------------- + +While each cache is free to use its own format for requests +and responses over channel, the following is recommended as +appropriate and support routines are available to help: +Each request or response record should be printable ASCII +with precisely one newline character which should be at the end. +Fields within the record should be separated by spaces, normally one. +If spaces, newlines, or nul characters are needed in a field they +much be quoted. two mechanisms are available: + +- If a field begins '\x' then it must contain an even number of + hex digits, and pairs of these digits provide the bytes in the + field. +- otherwise a \ in the field must be followed by 3 octal digits + which give the code for a byte. Other characters are treated + as them selves. At the very least, space, newline, nul, and + '\' must be quoted in this way. diff --git a/Documentation/filesystems/nfs/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.txt deleted file mode 100644 index c4dac829db0f..000000000000 --- a/Documentation/filesystems/nfs/rpc-cache.txt +++ /dev/null @@ -1,202 +0,0 @@ - This document gives a brief introduction to the caching -mechanisms in the sunrpc layer that is used, in particular, -for NFS authentication. - -CACHES -====== -The caching replaces the old exports table and allows for -a wide variety of values to be caches. - -There are a number of caches that are similar in structure though -quite possibly very different in content and use. There is a corpus -of common code for managing these caches. - -Examples of caches that are likely to be needed are: - - mapping from IP address to client name - - mapping from client name and filesystem to export options - - mapping from UID to list of GIDs, to work around NFS's limitation - of 16 gids. - - mappings between local UID/GID and remote UID/GID for sites that - do not have uniform uid assignment - - mapping from network identify to public key for crypto authentication. - -The common code handles such things as: - - general cache lookup with correct locking - - supporting 'NEGATIVE' as well as positive entries - - allowing an EXPIRED time on cache items, and removing - items after they expire, and are no longer in-use. - - making requests to user-space to fill in cache entries - - allowing user-space to directly set entries in the cache - - delaying RPC requests that depend on as-yet incomplete - cache entries, and replaying those requests when the cache entry - is complete. - - clean out old entries as they expire. - -Creating a Cache ----------------- - -1/ A cache needs a datum to store. This is in the form of a - structure definition that must contain a - struct cache_head - as an element, usually the first. - It will also contain a key and some content. - Each cache element is reference counted and contains - expiry and update times for use in cache management. -2/ A cache needs a "cache_detail" structure that - describes the cache. This stores the hash table, some - parameters for cache management, and some operations detailing how - to work with particular cache items. - The operations requires are: - struct cache_head *alloc(void) - This simply allocates appropriate memory and returns - a pointer to the cache_detail embedded within the - structure - void cache_put(struct kref *) - This is called when the last reference to an item is - dropped. The pointer passed is to the 'ref' field - in the cache_head. cache_put should release any - references create by 'cache_init' and, if CACHE_VALID - is set, any references created by cache_update. - It should then release the memory allocated by - 'alloc'. - int match(struct cache_head *orig, struct cache_head *new) - test if the keys in the two structures match. Return - 1 if they do, 0 if they don't. - void init(struct cache_head *orig, struct cache_head *new) - Set the 'key' fields in 'new' from 'orig'. This may - include taking references to shared objects. - void update(struct cache_head *orig, struct cache_head *new) - Set the 'content' fileds in 'new' from 'orig'. - int cache_show(struct seq_file *m, struct cache_detail *cd, - struct cache_head *h) - Optional. Used to provide a /proc file that lists the - contents of a cache. This should show one item, - usually on just one line. - int cache_request(struct cache_detail *cd, struct cache_head *h, - char **bpp, int *blen) - Format a request to be send to user-space for an item - to be instantiated. *bpp is a buffer of size *blen. - bpp should be moved forward over the encoded message, - and *blen should be reduced to show how much free - space remains. Return 0 on success or <0 if not - enough room or other problem. - int cache_parse(struct cache_detail *cd, char *buf, int len) - A message from user space has arrived to fill out a - cache entry. It is in 'buf' of length 'len'. - cache_parse should parse this, find the item in the - cache with sunrpc_cache_lookup_rcu, and update the item - with sunrpc_cache_update. - - -3/ A cache needs to be registered using cache_register(). This - includes it on a list of caches that will be regularly - cleaned to discard old data. - -Using a cache -------------- - -To find a value in a cache, call sunrpc_cache_lookup_rcu passing a pointer -to the cache_head in a sample item with the 'key' fields filled in. -This will be passed to ->match to identify the target entry. If no -entry is found, a new entry will be create, added to the cache, and -marked as not containing valid data. - -The item returned is typically passed to cache_check which will check -if the data is valid, and may initiate an up-call to get fresh data. -cache_check will return -ENOENT in the entry is negative or if an up -call is needed but not possible, -EAGAIN if an upcall is pending, -or 0 if the data is valid; - -cache_check can be passed a "struct cache_req *". This structure is -typically embedded in the actual request and can be used to create a -deferred copy of the request (struct cache_deferred_req). This is -done when the found cache item is not uptodate, but the is reason to -believe that userspace might provide information soon. When the cache -item does become valid, the deferred copy of the request will be -revisited (->revisit). It is expected that this method will -reschedule the request for processing. - -The value returned by sunrpc_cache_lookup_rcu can also be passed to -sunrpc_cache_update to set the content for the item. A second item is -passed which should hold the content. If the item found by _lookup -has valid data, then it is discarded and a new item is created. This -saves any user of an item from worrying about content changing while -it is being inspected. If the item found by _lookup does not contain -valid data, then the content is copied across and CACHE_VALID is set. - -Populating a cache ------------------- - -Each cache has a name, and when the cache is registered, a directory -with that name is created in /proc/net/rpc - -This directory contains a file called 'channel' which is a channel -for communicating between kernel and user for populating the cache. -This directory may later contain other files of interacting -with the cache. - -The 'channel' works a bit like a datagram socket. Each 'write' is -passed as a whole to the cache for parsing and interpretation. -Each cache can treat the write requests differently, but it is -expected that a message written will contain: - - a key - - an expiry time - - a content. -with the intention that an item in the cache with the give key -should be create or updated to have the given content, and the -expiry time should be set on that item. - -Reading from a channel is a bit more interesting. When a cache -lookup fails, or when it succeeds but finds an entry that may soon -expire, a request is lodged for that cache item to be updated by -user-space. These requests appear in the channel file. - -Successive reads will return successive requests. -If there are no more requests to return, read will return EOF, but a -select or poll for read will block waiting for another request to be -added. - -Thus a user-space helper is likely to: - open the channel. - select for readable - read a request - write a response - loop. - -If it dies and needs to be restarted, any requests that have not been -answered will still appear in the file and will be read by the new -instance of the helper. - -Each cache should define a "cache_parse" method which takes a message -written from user-space and processes it. It should return an error -(which propagates back to the write syscall) or 0. - -Each cache should also define a "cache_request" method which -takes a cache item and encodes a request into the buffer -provided. - -Note: If a cache has no active readers on the channel, and has had not -active readers for more than 60 seconds, further requests will not be -added to the channel but instead all lookups that do not find a valid -entry will fail. This is partly for backward compatibility: The -previous nfs exports table was deemed to be authoritative and a -failed lookup meant a definite 'no'. - -request/response format ------------------------ - -While each cache is free to use its own format for requests -and responses over channel, the following is recommended as -appropriate and support routines are available to help: -Each request or response record should be printable ASCII -with precisely one newline character which should be at the end. -Fields within the record should be separated by spaces, normally one. -If spaces, newlines, or nul characters are needed in a field they -much be quoted. two mechanisms are available: -1/ If a field begins '\x' then it must contain an even number of - hex digits, and pairs of these digits provide the bytes in the - field. -2/ otherwise a \ in the field must be followed by 3 octal digits - which give the code for a byte. Other characters are treated - as them selves. At the very least, space, newline, nul, and - '\' must be quoted in this way. -- cgit v1.2.3 From 250baf06aacf4eafb5641c86c91f2b1df4cf7d86 Mon Sep 17 00:00:00 2001 From: "Daniel W. S. Almeida" Date: Wed, 29 Jan 2020 01:49:15 -0300 Subject: Documentation: nfs: rpc-server-gss: convert to ReST Convert rpc-server-gss.txt to ReST. Content remains mostly unchanged. Signed-off-by: Daniel W. S. Almeida Link: https://lore.kernel.org/r/20200129044917.566906-4-dwlsalmeida@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/nfs/index.rst | 1 + Documentation/filesystems/nfs/rpc-server-gss.rst | 94 ++++++++++++++++++++++++ Documentation/filesystems/nfs/rpc-server-gss.txt | 91 ----------------------- 3 files changed, 95 insertions(+), 91 deletions(-) create mode 100644 Documentation/filesystems/nfs/rpc-server-gss.rst delete mode 100644 Documentation/filesystems/nfs/rpc-server-gss.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst index 52f4956e7770..9d5365cbe2c3 100644 --- a/Documentation/filesystems/nfs/index.rst +++ b/Documentation/filesystems/nfs/index.rst @@ -8,3 +8,4 @@ NFS pnfs rpc-cache + rpc-server-gss diff --git a/Documentation/filesystems/nfs/rpc-server-gss.rst b/Documentation/filesystems/nfs/rpc-server-gss.rst new file mode 100644 index 000000000000..812754576845 --- /dev/null +++ b/Documentation/filesystems/nfs/rpc-server-gss.rst @@ -0,0 +1,94 @@ +========================================= +rpcsec_gss support for kernel RPC servers +========================================= + +This document gives references to the standards and protocols used to +implement RPCGSS authentication in kernel RPC servers such as the NFS +server and the NFS client's NFSv4.0 callback server. (But note that +NFSv4.1 and higher don't require the client to act as a server for the +purposes of authentication.) + +RPCGSS is specified in a few IETF documents: + + - RFC2203 v1: http://tools.ietf.org/rfc/rfc2203.txt + - RFC5403 v2: http://tools.ietf.org/rfc/rfc5403.txt + +and there is a 3rd version being proposed: + + - http://tools.ietf.org/id/draft-williams-rpcsecgssv3.txt + (At draft n. 02 at the time of writing) + +Background +========== + +The RPCGSS Authentication method describes a way to perform GSSAPI +Authentication for NFS. Although GSSAPI is itself completely mechanism +agnostic, in many cases only the KRB5 mechanism is supported by NFS +implementations. + +The Linux kernel, at the moment, supports only the KRB5 mechanism, and +depends on GSSAPI extensions that are KRB5 specific. + +GSSAPI is a complex library, and implementing it completely in kernel is +unwarranted. However GSSAPI operations are fundementally separable in 2 +parts: + +- initial context establishment +- integrity/privacy protection (signing and encrypting of individual + packets) + +The former is more complex and policy-independent, but less +performance-sensitive. The latter is simpler and needs to be very fast. + +Therefore, we perform per-packet integrity and privacy protection in the +kernel, but leave the initial context establishment to userspace. We +need upcalls to request userspace to perform context establishment. + +NFS Server Legacy Upcall Mechanism +================================== + +The classic upcall mechanism uses a custom text based upcall mechanism +to talk to a custom daemon called rpc.svcgssd that is provide by the +nfs-utils package. + +This upcall mechanism has 2 limitations: + +A) It can handle tokens that are no bigger than 2KiB + +In some Kerberos deployment GSSAPI tokens can be quite big, up and +beyond 64KiB in size due to various authorization extensions attacked to +the Kerberos tickets, that needs to be sent through the GSS layer in +order to perform context establishment. + +B) It does not properly handle creds where the user is member of more +than a few thousand groups (the current hard limit in the kernel is 65K +groups) due to limitation on the size of the buffer that can be send +back to the kernel (4KiB). + +NFS Server New RPC Upcall Mechanism +=================================== + +The newer upcall mechanism uses RPC over a unix socket to a daemon +called gss-proxy, implemented by a userspace program called Gssproxy. + +The gss_proxy RPC protocol is currently documented `here +`_. + +This upcall mechanism uses the kernel rpc client and connects to the gssproxy +userspace program over a regular unix socket. The gssproxy protocol does not +suffer from the size limitations of the legacy protocol. + +Negotiating Upcall Mechanisms +============================= + +To provide backward compatibility, the kernel defaults to using the +legacy mechanism. To switch to the new mechanism, gss-proxy must bind +to /var/run/gssproxy.sock and then write "1" to +/proc/net/rpc/use-gss-proxy. If gss-proxy dies, it must repeat both +steps. + +Once the upcall mechanism is chosen, it cannot be changed. To prevent +locking into the legacy mechanisms, the above steps must be performed +before starting nfsd. Whoever starts nfsd can guarantee this by reading +from /proc/net/rpc/use-gss-proxy and checking that it contains a +"1"--the read will block until gss-proxy has done its write to the file. diff --git a/Documentation/filesystems/nfs/rpc-server-gss.txt b/Documentation/filesystems/nfs/rpc-server-gss.txt deleted file mode 100644 index 310bbbaf9080..000000000000 --- a/Documentation/filesystems/nfs/rpc-server-gss.txt +++ /dev/null @@ -1,91 +0,0 @@ - -rpcsec_gss support for kernel RPC servers -========================================= - -This document gives references to the standards and protocols used to -implement RPCGSS authentication in kernel RPC servers such as the NFS -server and the NFS client's NFSv4.0 callback server. (But note that -NFSv4.1 and higher don't require the client to act as a server for the -purposes of authentication.) - -RPCGSS is specified in a few IETF documents: - - RFC2203 v1: http://tools.ietf.org/rfc/rfc2203.txt - - RFC5403 v2: http://tools.ietf.org/rfc/rfc5403.txt -and there is a 3rd version being proposed: - - http://tools.ietf.org/id/draft-williams-rpcsecgssv3.txt - (At draft n. 02 at the time of writing) - -Background ----------- - -The RPCGSS Authentication method describes a way to perform GSSAPI -Authentication for NFS. Although GSSAPI is itself completely mechanism -agnostic, in many cases only the KRB5 mechanism is supported by NFS -implementations. - -The Linux kernel, at the moment, supports only the KRB5 mechanism, and -depends on GSSAPI extensions that are KRB5 specific. - -GSSAPI is a complex library, and implementing it completely in kernel is -unwarranted. However GSSAPI operations are fundementally separable in 2 -parts: -- initial context establishment -- integrity/privacy protection (signing and encrypting of individual - packets) - -The former is more complex and policy-independent, but less -performance-sensitive. The latter is simpler and needs to be very fast. - -Therefore, we perform per-packet integrity and privacy protection in the -kernel, but leave the initial context establishment to userspace. We -need upcalls to request userspace to perform context establishment. - -NFS Server Legacy Upcall Mechanism ----------------------------------- - -The classic upcall mechanism uses a custom text based upcall mechanism -to talk to a custom daemon called rpc.svcgssd that is provide by the -nfs-utils package. - -This upcall mechanism has 2 limitations: - -A) It can handle tokens that are no bigger than 2KiB - -In some Kerberos deployment GSSAPI tokens can be quite big, up and -beyond 64KiB in size due to various authorization extensions attacked to -the Kerberos tickets, that needs to be sent through the GSS layer in -order to perform context establishment. - -B) It does not properly handle creds where the user is member of more -than a few thousand groups (the current hard limit in the kernel is 65K -groups) due to limitation on the size of the buffer that can be send -back to the kernel (4KiB). - -NFS Server New RPC Upcall Mechanism ------------------------------------ - -The newer upcall mechanism uses RPC over a unix socket to a daemon -called gss-proxy, implemented by a userspace program called Gssproxy. - -The gss_proxy RPC protocol is currently documented here: - - https://fedorahosted.org/gss-proxy/wiki/ProtocolDocumentation - -This upcall mechanism uses the kernel rpc client and connects to the gssproxy -userspace program over a regular unix socket. The gssproxy protocol does not -suffer from the size limitations of the legacy protocol. - -Negotiating Upcall Mechanisms ------------------------------ - -To provide backward compatibility, the kernel defaults to using the -legacy mechanism. To switch to the new mechanism, gss-proxy must bind -to /var/run/gssproxy.sock and then write "1" to -/proc/net/rpc/use-gss-proxy. If gss-proxy dies, it must repeat both -steps. - -Once the upcall mechanism is chosen, it cannot be changed. To prevent -locking into the legacy mechanisms, the above steps must be performed -before starting nfsd. Whoever starts nfsd can guarantee this by reading -from /proc/net/rpc/use-gss-proxy and checking that it contains a -"1"--the read will block until gss-proxy has done its write to the file. -- cgit v1.2.3 From 04f81fb08d067f79c59fe132929a9c81eb9cb74b Mon Sep 17 00:00:00 2001 From: "Daniel W. S. Almeida" Date: Wed, 29 Jan 2020 01:49:16 -0300 Subject: Documentation: nfs: nfs41-server: convert to ReST Convert nfs41-server.txt to ReST. ASCII tables were converted to ReST grid table format. Signed-off-by: Daniel W. S. Almeida Link: https://lore.kernel.org/r/20200129044917.566906-5-dwlsalmeida@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/nfs/index.rst | 1 + Documentation/filesystems/nfs/nfs41-server.rst | 256 +++++++++++++++++++++++++ Documentation/filesystems/nfs/nfs41-server.txt | 173 ----------------- 3 files changed, 257 insertions(+), 173 deletions(-) create mode 100644 Documentation/filesystems/nfs/nfs41-server.rst delete mode 100644 Documentation/filesystems/nfs/nfs41-server.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst index 9d5365cbe2c3..a0a678af921b 100644 --- a/Documentation/filesystems/nfs/index.rst +++ b/Documentation/filesystems/nfs/index.rst @@ -9,3 +9,4 @@ NFS pnfs rpc-cache rpc-server-gss + nfs41-server diff --git a/Documentation/filesystems/nfs/nfs41-server.rst b/Documentation/filesystems/nfs/nfs41-server.rst new file mode 100644 index 000000000000..16b5f02f81c3 --- /dev/null +++ b/Documentation/filesystems/nfs/nfs41-server.rst @@ -0,0 +1,256 @@ +============================= +NFSv4.1 Server Implementation +============================= + +Server support for minorversion 1 can be controlled using the +/proc/fs/nfsd/versions control file. The string output returned +by reading this file will contain either "+4.1" or "-4.1" +correspondingly. + +Currently, server support for minorversion 1 is enabled by default. +It can be disabled at run time by writing the string "-4.1" to +the /proc/fs/nfsd/versions control file. Note that to write this +control file, the nfsd service must be taken down. You can use rpc.nfsd +for this; see rpc.nfsd(8). + +(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and +"-4", respectively. Therefore, code meant to work on both new and old +kernels must turn 4.1 on or off *before* turning support for version 4 +on or off; rpc.nfsd does this correctly.) + +The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based +on RFC 5661. + +From the many new features in NFSv4.1 the current implementation +focuses on the mandatory-to-implement NFSv4.1 Sessions, providing +"exactly once" semantics and better control and throttling of the +resources allocated for each client. + +The table below, taken from the NFSv4.1 document, lists +the operations that are mandatory to implement (REQ), optional +(OPT), and NFSv4.0 operations that are required not to implement (MNI) +in minor version 1. The first column indicates the operations that +are not supported yet by the linux server implementation. + +The OPTIONAL features identified and their abbreviations are as follows: + +- **pNFS** Parallel NFS +- **FDELG** File Delegations +- **DDELG** Directory Delegations + +The following abbreviations indicate the linux server implementation status. + +- **I** Implemented NFSv4.1 operations. +- **NS** Not Supported. +- **NS\*** Unimplemented optional feature. + +Operations +========== + ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| Implementation status | Operation | REQ,REC, OPT or NMI | Feature (REQ, REC or OPT) | Definition | ++=======================+======================+=====================+===========================+================+ +| | ACCESS | REQ | | Section 18.1 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | BACKCHANNEL_CTL | REQ | | Section 18.33 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | CLOSE | REQ | | Section 18.2 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | COMMIT | REQ | | Section 18.3 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | CREATE | REQ | | Section 18.4 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | CREATE_SESSION | REQ | | Section 18.36 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| NS* | DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | DELEGRETURN | OPT | FDELG, | Section 18.6 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | | | DDELG, pNFS | | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | | | (REQ) | | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | DESTROY_CLIENTID | REQ | | Section 18.50 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | DESTROY_SESSION | REQ | | Section 18.37 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | EXCHANGE_ID | REQ | | Section 18.35 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | FREE_STATEID | REQ | | Section 18.38 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | GETATTR | REQ | | Section 18.7 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| NS* | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | GETFH | REQ | | Section 18.8 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| NS* | GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | LINK | OPT | | Section 18.9 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | LOCK | REQ | | Section 18.10 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | LOCKT | REQ | | Section 18.11 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | LOCKU | REQ | | Section 18.12 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | LOOKUP | REQ | | Section 18.13 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | LOOKUPP | REQ | | Section 18.14 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | NVERIFY | REQ | | Section 18.15 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | OPEN | REQ | | Section 18.16 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| NS* | OPENATTR | OPT | | Section 18.17 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | OPEN_CONFIRM | MNI | | N/A | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | OPEN_DOWNGRADE | REQ | | Section 18.18 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | PUTFH | REQ | | Section 18.19 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | PUTPUBFH | REQ | | Section 18.20 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | PUTROOTFH | REQ | | Section 18.21 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | READ | REQ | | Section 18.22 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | READDIR | REQ | | Section 18.23 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | READLINK | OPT | | Section 18.24 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | RECLAIM_COMPLETE | REQ | | Section 18.51 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | RELEASE_LOCKOWNER | MNI | | N/A | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | REMOVE | REQ | | Section 18.25 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | RENAME | REQ | | Section 18.26 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | RENEW | MNI | | N/A | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | RESTOREFH | REQ | | Section 18.27 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | SAVEFH | REQ | | Section 18.28 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | SECINFO | REQ | | Section 18.29 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | | | layout (REQ) | Section 13.12 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | SEQUENCE | REQ | | Section 18.46 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | SETATTR | REQ | | Section 18.30 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | SETCLIENTID | MNI | | N/A | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | SETCLIENTID_CONFIRM | MNI | | N/A | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| NS | SET_SSV | REQ | | Section 18.47 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| I | TEST_STATEID | REQ | | Section 18.48 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | VERIFY | REQ | | Section 18.31 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| NS* | WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ +| | WRITE | REQ | | Section 18.32 | ++-----------------------+----------------------+---------------------+---------------------------+----------------+ + + +Callback Operations +=================== ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| Implementation status | Operation | REQ,REC, OPT or NMI | Feature (REQ, REC or OPT) | Definition | ++=======================+=========================+=====================+===========================+===============+ +| | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| I | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| NS* | CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| NS* | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| NS* | CB_NOTIFY_LOCK | OPT | | Section 20.11 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| NS* | CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | CB_RECALL | OPT | FDELG, | Section 20.2 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | DDELG, pNFS | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | (REQ) | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| NS* | CB_RECALL_ANY | OPT | FDELG, | Section 20.6 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | DDELG, pNFS | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | (REQ) | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| NS | CB_RECALL_SLOT | REQ | | Section 20.8 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| NS* | CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | (REQ) | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | DDELG, pNFS | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | (REQ) | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| NS* | CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | DDELG, pNFS | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ +| | | | (REQ) | | ++-----------------------+-------------------------+---------------------+---------------------------+---------------+ + + +Implementation notes: +===================== + +SSV: + The spec claims this is mandatory, but we don't actually know of any + implementations, so we're ignoring it for now. The server returns + NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof. + +GSS on the backchannel: + Again, theoretically required but not widely implemented (in + particular, the current Linux client doesn't request it). We return + NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION. + +DELEGPURGE: + mandatory only for servers that support CLAIM_DELEGATE_PREV and/or + CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that + persist across client reboots). Thus we need not implement this for + now. + +EXCHANGE_ID: + implementation ids are ignored + +CREATE_SESSION: + backchannel attributes are ignored + +SEQUENCE: + no support for dynamic slot table renegotiation (optional) + +Nonstandard compound limitations: + No support for a sessions fore channel RPC compound that requires both a + ca_maxrequestsize request and a ca_maxresponsesize reply, so we may + fail to live up to the promise we made in CREATE_SESSION fore channel + negotiation. + +See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt deleted file mode 100644 index 682a59fabe3f..000000000000 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ /dev/null @@ -1,173 +0,0 @@ -NFSv4.1 Server Implementation - -Server support for minorversion 1 can be controlled using the -/proc/fs/nfsd/versions control file. The string output returned -by reading this file will contain either "+4.1" or "-4.1" -correspondingly. - -Currently, server support for minorversion 1 is enabled by default. -It can be disabled at run time by writing the string "-4.1" to -the /proc/fs/nfsd/versions control file. Note that to write this -control file, the nfsd service must be taken down. You can use rpc.nfsd -for this; see rpc.nfsd(8). - -(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and -"-4", respectively. Therefore, code meant to work on both new and old -kernels must turn 4.1 on or off *before* turning support for version 4 -on or off; rpc.nfsd does this correctly.) - -The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based -on RFC 5661. - -From the many new features in NFSv4.1 the current implementation -focuses on the mandatory-to-implement NFSv4.1 Sessions, providing -"exactly once" semantics and better control and throttling of the -resources allocated for each client. - -The table below, taken from the NFSv4.1 document, lists -the operations that are mandatory to implement (REQ), optional -(OPT), and NFSv4.0 operations that are required not to implement (MNI) -in minor version 1. The first column indicates the operations that -are not supported yet by the linux server implementation. - -The OPTIONAL features identified and their abbreviations are as follows: - pNFS Parallel NFS - FDELG File Delegations - DDELG Directory Delegations - -The following abbreviations indicate the linux server implementation status. - I Implemented NFSv4.1 operations. - NS Not Supported. - NS* Unimplemented optional feature. - -Operations - - +----------------------+------------+--------------+----------------+ - | Operation | REQ, REC, | Feature | Definition | - | | OPT, or | (REQ, REC, | | - | | MNI | or OPT) | | - +----------------------+------------+--------------+----------------+ - | ACCESS | REQ | | Section 18.1 | -I | BACKCHANNEL_CTL | REQ | | Section 18.33 | -I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 | - | CLOSE | REQ | | Section 18.2 | - | COMMIT | REQ | | Section 18.3 | - | CREATE | REQ | | Section 18.4 | -I | CREATE_SESSION | REQ | | Section 18.36 | -NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 | - | DELEGRETURN | OPT | FDELG, | Section 18.6 | - | | | DDELG, pNFS | | - | | | (REQ) | | -I | DESTROY_CLIENTID | REQ | | Section 18.50 | -I | DESTROY_SESSION | REQ | | Section 18.37 | -I | EXCHANGE_ID | REQ | | Section 18.35 | -I | FREE_STATEID | REQ | | Section 18.38 | - | GETATTR | REQ | | Section 18.7 | -I | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 | -NS*| GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 | - | GETFH | REQ | | Section 18.8 | -NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 | -I | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 | -I | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 | -I | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 | - | LINK | OPT | | Section 18.9 | - | LOCK | REQ | | Section 18.10 | - | LOCKT | REQ | | Section 18.11 | - | LOCKU | REQ | | Section 18.12 | - | LOOKUP | REQ | | Section 18.13 | - | LOOKUPP | REQ | | Section 18.14 | - | NVERIFY | REQ | | Section 18.15 | - | OPEN | REQ | | Section 18.16 | -NS*| OPENATTR | OPT | | Section 18.17 | - | OPEN_CONFIRM | MNI | | N/A | - | OPEN_DOWNGRADE | REQ | | Section 18.18 | - | PUTFH | REQ | | Section 18.19 | - | PUTPUBFH | REQ | | Section 18.20 | - | PUTROOTFH | REQ | | Section 18.21 | - | READ | REQ | | Section 18.22 | - | READDIR | REQ | | Section 18.23 | - | READLINK | OPT | | Section 18.24 | - | RECLAIM_COMPLETE | REQ | | Section 18.51 | - | RELEASE_LOCKOWNER | MNI | | N/A | - | REMOVE | REQ | | Section 18.25 | - | RENAME | REQ | | Section 18.26 | - | RENEW | MNI | | N/A | - | RESTOREFH | REQ | | Section 18.27 | - | SAVEFH | REQ | | Section 18.28 | - | SECINFO | REQ | | Section 18.29 | -I | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, | - | | | layout (REQ) | Section 13.12 | -I | SEQUENCE | REQ | | Section 18.46 | - | SETATTR | REQ | | Section 18.30 | - | SETCLIENTID | MNI | | N/A | - | SETCLIENTID_CONFIRM | MNI | | N/A | -NS | SET_SSV | REQ | | Section 18.47 | -I | TEST_STATEID | REQ | | Section 18.48 | - | VERIFY | REQ | | Section 18.31 | -NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 | - | WRITE | REQ | | Section 18.32 | - -Callback Operations - - +-------------------------+-----------+-------------+---------------+ - | Operation | REQ, REC, | Feature | Definition | - | | OPT, or | (REQ, REC, | | - | | MNI | or OPT) | | - +-------------------------+-----------+-------------+---------------+ - | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 | -I | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 | -NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 | -NS*| CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 | -NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 | -NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 | - | CB_RECALL | OPT | FDELG, | Section 20.2 | - | | | DDELG, pNFS | | - | | | (REQ) | | -NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 | - | | | DDELG, pNFS | | - | | | (REQ) | | -NS | CB_RECALL_SLOT | REQ | | Section 20.8 | -NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 | - | | | (REQ) | | -I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 | - | | | DDELG, pNFS | | - | | | (REQ) | | -NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 | - | | | DDELG, pNFS | | - | | | (REQ) | | - +-------------------------+-----------+-------------+---------------+ - -Implementation notes: - -SSV: -* The spec claims this is mandatory, but we don't actually know of any - implementations, so we're ignoring it for now. The server returns - NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof. - -GSS on the backchannel: -* Again, theoretically required but not widely implemented (in - particular, the current Linux client doesn't request it). We return - NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION. - -DELEGPURGE: -* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or - CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that - persist across client reboots). Thus we need not implement this for - now. - -EXCHANGE_ID: -* implementation ids are ignored - -CREATE_SESSION: -* backchannel attributes are ignored - -SEQUENCE: -* no support for dynamic slot table renegotiation (optional) - -Nonstandard compound limitations: -* No support for a sessions fore channel RPC compound that requires both a - ca_maxrequestsize request and a ca_maxresponsesize reply, so we may - fail to live up to the promise we made in CREATE_SESSION fore channel - negotiation. - -See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. -- cgit v1.2.3 From cb63032b1233e03ac20fc2b60820a50d605b9bc0 Mon Sep 17 00:00:00 2001 From: "Daniel W. S. Almeida" Date: Wed, 29 Jan 2020 01:49:17 -0300 Subject: Documentation: nfs: knfsd-stats: convert to ReST Convert knfsd-stats.txt to ReST. Content remains mostly the same. Signed-off-by: Daniel W. S. Almeida Link: https://lore.kernel.org/r/20200129044917.566906-6-dwlsalmeida@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/filesystems/nfs/index.rst | 1 + Documentation/filesystems/nfs/knfsd-stats.rst | 122 +++++++++++++++++++++++++ Documentation/filesystems/nfs/knfsd-stats.txt | 123 -------------------------- 3 files changed, 123 insertions(+), 123 deletions(-) create mode 100644 Documentation/filesystems/nfs/knfsd-stats.rst delete mode 100644 Documentation/filesystems/nfs/knfsd-stats.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/nfs/index.rst b/Documentation/filesystems/nfs/index.rst index a0a678af921b..65805624e39b 100644 --- a/Documentation/filesystems/nfs/index.rst +++ b/Documentation/filesystems/nfs/index.rst @@ -10,3 +10,4 @@ NFS rpc-cache rpc-server-gss nfs41-server + knfsd-stats diff --git a/Documentation/filesystems/nfs/knfsd-stats.rst b/Documentation/filesystems/nfs/knfsd-stats.rst new file mode 100644 index 000000000000..80bcf13550de --- /dev/null +++ b/Documentation/filesystems/nfs/knfsd-stats.rst @@ -0,0 +1,122 @@ +============================ +Kernel NFS Server Statistics +============================ + +:Authors: Greg Banks - 26 Mar 2009 + +This document describes the format and semantics of the statistics +which the kernel NFS server makes available to userspace. These +statistics are available in several text form pseudo files, each of +which is described separately below. + +In most cases you don't need to know these formats, as the nfsstat(8) +program from the nfs-utils distribution provides a helpful command-line +interface for extracting and printing them. + +All the files described here are formatted as a sequence of text lines, +separated by newline '\n' characters. Lines beginning with a hash +'#' character are comments intended for humans and should be ignored +by parsing routines. All other lines contain a sequence of fields +separated by whitespace. + +/proc/fs/nfsd/pool_stats +======================== + +This file is available in kernels from 2.6.30 onwards, if the +/proc/fs/nfsd filesystem is mounted (it almost always should be). + +The first line is a comment which describes the fields present in +all the other lines. The other lines present the following data as +a sequence of unsigned decimal numeric fields. One line is shown +for each NFS thread pool. + +All counters are 64 bits wide and wrap naturally. There is no way +to zero these counters, instead applications should do their own +rate conversion. + +pool + The id number of the NFS thread pool to which this line applies. + This number does not change. + + Thread pool ids are a contiguous set of small integers starting + at zero. The maximum value depends on the thread pool mode, but + currently cannot be larger than the number of CPUs in the system. + Note that in the default case there will be a single thread pool + which contains all the nfsd threads and all the CPUs in the system, + and thus this file will have a single line with a pool id of "0". + +packets-arrived + Counts how many NFS packets have arrived. More precisely, this + is the number of times that the network stack has notified the + sunrpc server layer that new data may be available on a transport + (e.g. an NFS or UDP socket or an NFS/RDMA endpoint). + + Depending on the NFS workload patterns and various network stack + effects (such as Large Receive Offload) which can combine packets + on the wire, this may be either more or less than the number + of NFS calls received (which statistic is available elsewhere). + However this is a more accurate and less workload-dependent measure + of how much CPU load is being placed on the sunrpc server layer + due to NFS network traffic. + +sockets-enqueued + Counts how many times an NFS transport is enqueued to wait for + an nfsd thread to service it, i.e. no nfsd thread was considered + available. + + The circumstance this statistic tracks indicates that there was NFS + network-facing work to be done but it couldn't be done immediately, + thus introducing a small delay in servicing NFS calls. The ideal + rate of change for this counter is zero; significantly non-zero + values may indicate a performance limitation. + + This can happen because there are too few nfsd threads in the thread + pool for the NFS workload (the workload is thread-limited), in which + case configuring more nfsd threads will probably improve the + performance of the NFS workload. + +threads-woken + Counts how many times an idle nfsd thread is woken to try to + receive some data from an NFS transport. + + This statistic tracks the circumstance where incoming + network-facing NFS work is being handled quickly, which is a good + thing. The ideal rate of change for this counter will be close + to but less than the rate of change of the packets-arrived counter. + +threads-timedout + Counts how many times an nfsd thread triggered an idle timeout, + i.e. was not woken to handle any incoming network packets for + some time. + + This statistic counts a circumstance where there are more nfsd + threads configured than can be used by the NFS workload. This is + a clue that the number of nfsd threads can be reduced without + affecting performance. Unfortunately, it's only a clue and not + a strong indication, for a couple of reasons: + + - Currently the rate at which the counter is incremented is quite + slow; the idle timeout is 60 minutes. Unless the NFS workload + remains constant for hours at a time, this counter is unlikely + to be providing information that is still useful. + + - It is usually a wise policy to provide some slack, + i.e. configure a few more nfsds than are currently needed, + to allow for future spikes in load. + + +Note that incoming packets on NFS transports will be dealt with in +one of three ways. An nfsd thread can be woken (threads-woken counts +this case), or the transport can be enqueued for later attention +(sockets-enqueued counts this case), or the packet can be temporarily +deferred because the transport is currently being used by an nfsd +thread. This last case is not very interesting and is not explicitly +counted, but can be inferred from the other counters thus:: + + packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken ) + + +More +==== + +Descriptions of the other statistics file should go here. diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt deleted file mode 100644 index 1a5d82180b84..000000000000 --- a/Documentation/filesystems/nfs/knfsd-stats.txt +++ /dev/null @@ -1,123 +0,0 @@ - -Kernel NFS Server Statistics -============================ - -This document describes the format and semantics of the statistics -which the kernel NFS server makes available to userspace. These -statistics are available in several text form pseudo files, each of -which is described separately below. - -In most cases you don't need to know these formats, as the nfsstat(8) -program from the nfs-utils distribution provides a helpful command-line -interface for extracting and printing them. - -All the files described here are formatted as a sequence of text lines, -separated by newline '\n' characters. Lines beginning with a hash -'#' character are comments intended for humans and should be ignored -by parsing routines. All other lines contain a sequence of fields -separated by whitespace. - -/proc/fs/nfsd/pool_stats ------------------------- - -This file is available in kernels from 2.6.30 onwards, if the -/proc/fs/nfsd filesystem is mounted (it almost always should be). - -The first line is a comment which describes the fields present in -all the other lines. The other lines present the following data as -a sequence of unsigned decimal numeric fields. One line is shown -for each NFS thread pool. - -All counters are 64 bits wide and wrap naturally. There is no way -to zero these counters, instead applications should do their own -rate conversion. - -pool - The id number of the NFS thread pool to which this line applies. - This number does not change. - - Thread pool ids are a contiguous set of small integers starting - at zero. The maximum value depends on the thread pool mode, but - currently cannot be larger than the number of CPUs in the system. - Note that in the default case there will be a single thread pool - which contains all the nfsd threads and all the CPUs in the system, - and thus this file will have a single line with a pool id of "0". - -packets-arrived - Counts how many NFS packets have arrived. More precisely, this - is the number of times that the network stack has notified the - sunrpc server layer that new data may be available on a transport - (e.g. an NFS or UDP socket or an NFS/RDMA endpoint). - - Depending on the NFS workload patterns and various network stack - effects (such as Large Receive Offload) which can combine packets - on the wire, this may be either more or less than the number - of NFS calls received (which statistic is available elsewhere). - However this is a more accurate and less workload-dependent measure - of how much CPU load is being placed on the sunrpc server layer - due to NFS network traffic. - -sockets-enqueued - Counts how many times an NFS transport is enqueued to wait for - an nfsd thread to service it, i.e. no nfsd thread was considered - available. - - The circumstance this statistic tracks indicates that there was NFS - network-facing work to be done but it couldn't be done immediately, - thus introducing a small delay in servicing NFS calls. The ideal - rate of change for this counter is zero; significantly non-zero - values may indicate a performance limitation. - - This can happen because there are too few nfsd threads in the thread - pool for the NFS workload (the workload is thread-limited), in which - case configuring more nfsd threads will probably improve the - performance of the NFS workload. - -threads-woken - Counts how many times an idle nfsd thread is woken to try to - receive some data from an NFS transport. - - This statistic tracks the circumstance where incoming - network-facing NFS work is being handled quickly, which is a good - thing. The ideal rate of change for this counter will be close - to but less than the rate of change of the packets-arrived counter. - -threads-timedout - Counts how many times an nfsd thread triggered an idle timeout, - i.e. was not woken to handle any incoming network packets for - some time. - - This statistic counts a circumstance where there are more nfsd - threads configured than can be used by the NFS workload. This is - a clue that the number of nfsd threads can be reduced without - affecting performance. Unfortunately, it's only a clue and not - a strong indication, for a couple of reasons: - - - Currently the rate at which the counter is incremented is quite - slow; the idle timeout is 60 minutes. Unless the NFS workload - remains constant for hours at a time, this counter is unlikely - to be providing information that is still useful. - - - It is usually a wise policy to provide some slack, - i.e. configure a few more nfsds than are currently needed, - to allow for future spikes in load. - - -Note that incoming packets on NFS transports will be dealt with in -one of three ways. An nfsd thread can be woken (threads-woken counts -this case), or the transport can be enqueued for later attention -(sockets-enqueued counts this case), or the packet can be temporarily -deferred because the transport is currently being used by an nfsd -thread. This last case is not very interesting and is not explicitly -counted, but can be inferred from the other counters thus: - -packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken ) - - -More ----- -Descriptions of the other statistics file should go here. - - -Greg Banks -26 Mar 2009 -- cgit v1.2.3 From 56e6b3b0b381abd0484802828764d01552ff76ab Mon Sep 17 00:00:00 2001 From: Yue Hu Date: Thu, 6 Feb 2020 19:10:31 +0800 Subject: Documentation: zram: fix the description about orig_data_size of mm_stat orig_data_size counted the same_pages by commit 51f9f82c855d ("zram: count same page write as page_stored"), so let's fix it. Signed-off-by: Yue Hu Link: https://lore.kernel.org/r/20200206111031.9524-1-zbestahu@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/blockdev/zram.rst | 2 -- 1 file changed, 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst index 27c77d853028..a6fd1f9b5faf 100644 --- a/Documentation/admin-guide/blockdev/zram.rst +++ b/Documentation/admin-guide/blockdev/zram.rst @@ -251,8 +251,6 @@ line of text and contains the following stats separated by whitespace: ================ ============================================================= orig_data_size uncompressed size of data stored in this disk. - This excludes same-element-filled pages (same_pages) since - no memory is allocated for them. Unit: bytes compr_data_size compressed size of data stored in this disk mem_used_total the amount of memory allocated for this disk. This -- cgit v1.2.3 From 895f2c20a88a343d12c387dab9d785ff665cb4ac Mon Sep 17 00:00:00 2001 From: "d.hatayama@fujitsu.com" Date: Thu, 13 Feb 2020 02:51:49 +0000 Subject: docs: admin-guide: Add description of %c corename format There is somehow no description of %c corename format specifier for /proc/sys/kernel/core_pattern. The %c corename format specifier is used by user-space application such as systemd-coredump, so it should be documented. To find where %c is handled in the kernel source code, look at function format_corename() in fs/coredump.c. Signed-off-by: HATAYAMA Daisuke Link: https://lore.kernel.org/r/TYAPR01MB4014714BB2ACE425BB6EC6B7951A0@TYAPR01MB4014.jpnprd01.prod.outlook.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index def074807cee..b08ba4e63291 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -213,6 +213,7 @@ core_pattern is used to specify a core dumpfile pattern name. %h hostname %e executable filename (may be shortened) %E executable path + %c maximum size of core file by resource limit RLIMIT_CORE % both are dropped * If the first character of the pattern is a '|', the kernel will treat -- cgit v1.2.3 From 3b82a112ce594889742164b242d8f213938a443f Mon Sep 17 00:00:00 2001 From: Wang Long Date: Fri, 7 Feb 2020 21:42:10 +0800 Subject: Documentation/ABI: move sysfs-kernel-uids to removed directory commit 7c9414385ebf ("sched: Remove USER_SCHED") deleted the USER_SCHED feature. so move the ABI doc to removed directory. Signed-off-by: Wang Long Link: https://lore.kernel.org/r/1581082930-30441-1-git-send-email-w@laoqinren.net Signed-off-by: Jonathan Corbet --- Documentation/ABI/removed/sysfs-kernel-uids | 14 ++++++++++++++ Documentation/ABI/testing/sysfs-kernel-uids | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) create mode 100644 Documentation/ABI/removed/sysfs-kernel-uids delete mode 100644 Documentation/ABI/testing/sysfs-kernel-uids (limited to 'Documentation') diff --git a/Documentation/ABI/removed/sysfs-kernel-uids b/Documentation/ABI/removed/sysfs-kernel-uids new file mode 100644 index 000000000000..dc4463f190a7 --- /dev/null +++ b/Documentation/ABI/removed/sysfs-kernel-uids @@ -0,0 +1,14 @@ +What: /sys/kernel/uids//cpu_shares +Date: December 2007, finally removed in kernel v2.6.34-rc1 +Contact: Dhaval Giani + Srivatsa Vaddagiri +Description: + The /sys/kernel/uids//cpu_shares tunable is used + to set the cpu bandwidth a user is allowed. This is a + propotional value. What that means is that if there + are two users logged in, each with an equal number of + shares, then they will get equal CPU bandwidth. Another + example would be, if User A has shares = 1024 and user + B has shares = 2048, User B will get twice the CPU + bandwidth user A will. For more details refer + Documentation/scheduler/sched-design-CFS.rst diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/testing/sysfs-kernel-uids deleted file mode 100644 index 4182b7061816..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-uids +++ /dev/null @@ -1,14 +0,0 @@ -What: /sys/kernel/uids//cpu_shares -Date: December 2007 -Contact: Dhaval Giani - Srivatsa Vaddagiri -Description: - The /sys/kernel/uids//cpu_shares tunable is used - to set the cpu bandwidth a user is allowed. This is a - propotional value. What that means is that if there - are two users logged in, each with an equal number of - shares, then they will get equal CPU bandwidth. Another - example would be, if User A has shares = 1024 and user - B has shares = 2048, User B will get twice the CPU - bandwidth user A will. For more details refer - Documentation/scheduler/sched-design-CFS.rst -- cgit v1.2.3 From 473da2f0d80aa7240dd0a2be5015fdfd93543ca2 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 9 Feb 2020 21:33:04 +0100 Subject: docs: userspace: ioctl-number: remove mc146818rtc conflict In 2.3.43pre2, the RTC ioctls definitions were actually moved from linux/mc146818rtc.h to linux/rtc.h Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20200209203304.66004-1-alexandre.belloni@bootlin.com Signed-off-by: Jonathan Corbet --- Documentation/userspace-api/ioctl/ioctl-number.rst | 1 - 1 file changed, 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 2e91370dc159..f759edafd938 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -266,7 +266,6 @@ Code Seq# Include File Comments 'o' 01-A1 `linux/dvb/*.h` DVB 'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this) 'p' 00-1F linux/rtc.h conflict! -'p' 00-3F linux/mc146818rtc.h conflict! 'p' 40-7F linux/nvram.h 'p' 80-9F linux/ppdev.h user-space parport -- cgit v1.2.3 From 2e5b1886e9bab6c29c5e5c3ce4e373bb9e9eaa8b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 9 Feb 2020 19:53:17 -0800 Subject: Documentation: bootconfig: fix Sphinx block warning Fix Sphinx format warning: lnx-56-rc1/Documentation/admin-guide/bootconfig.rst:26: WARNING: Literal block expected; none found. Signed-off-by: Randy Dunlap Cc: Steven Rostedt Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/07b3e31f-9b1e-1876-aa60-4436e4dd6da0@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/bootconfig.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index b342a6796392..e603ebb5bdda 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -23,7 +23,7 @@ of dot-connected-words, and key and value are connected by ``=``. The value has to be terminated by semi-colon (``;``) or newline (``\n``). For array value, array entries are separated by comma (``,``). :: -KEY[.WORD[...]] = VALUE[, VALUE2[...]][;] + KEY[.WORD[...]] = VALUE[, VALUE2[...]][;] Unlike the kernel command line syntax, spaces are OK around the comma and ``=``. -- cgit v1.2.3 From 06b72824386795bf6f0a6ac0f0cfef6b7f0165c1 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 13 Feb 2020 16:51:53 +0100 Subject: ASoC: meson: aiu: add audio output dt-bindings Add the dt-bindings and documentation of the AIU audio controller. This component provides most of the audio outputs found on the Amlogic Gx SoC family. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20200213155159.3235792-4-jbrunet@baylibre.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/amlogic,aiu.yaml | 111 +++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/amlogic,aiu.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml b/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml new file mode 100644 index 000000000000..3ef7632dcb59 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/amlogic,aiu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic AIU audio output controller + +maintainers: + - Jerome Brunet + +properties: + $nodename: + pattern: "^audio-controller@.*" + + "#sound-dai-cells": + const: 2 + + compatible: + items: + - enum: + - amlogic,aiu-gxbb + - amlogic,aiu-gxl + - const: + amlogic,aiu + + clocks: + items: + - description: AIU peripheral clock + - description: I2S peripheral clock + - description: I2S output clock + - description: I2S master clock + - description: I2S mixer clock + - description: SPDIF peripheral clock + - description: SPDIF output clock + - description: SPDIF master clock + - description: SPDIF master clock multiplexer + + clock-names: + items: + - const: pclk + - const: i2s_pclk + - const: i2s_aoclk + - const: i2s_mclk + - const: i2s_mixer + - const: spdif_pclk + - const: spdif_aoclk + - const: spdif_mclk + - const: spdif_mclk_sel + + interrupts: + items: + - description: I2S interrupt line + - description: SPDIF interrupt line + + interrupt-names: + items: + - const: i2s + - const: spdif + + reg: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - "#sound-dai-cells" + - compatible + - clocks + - clock-names + - interrupts + - interrupt-names + - reg + - resets + +examples: + - | + #include + #include + #include + #include + + aiu: audio-controller@5400 { + compatible = "amlogic,aiu-gxl", "amlogic,aiu"; + #sound-dai-cells = <2>; + reg = <0x0 0x5400 0x0 0x2ac>; + interrupts = , + ; + interrupt-names = "i2s", "spdif"; + clocks = <&clkc CLKID_AIU_GLUE>, + <&clkc CLKID_I2S_OUT>, + <&clkc CLKID_AOCLK_GATE>, + <&clkc CLKID_CTS_AMCLK>, + <&clkc CLKID_MIXER_IFACE>, + <&clkc CLKID_IEC958>, + <&clkc CLKID_IEC958_GATE>, + <&clkc CLKID_CTS_MCLK_I958>, + <&clkc CLKID_CTS_I958>; + clock-names = "pclk", + "i2s_pclk", + "i2s_aoclk", + "i2s_mclk", + "i2s_mixer", + "spdif_pclk", + "spdif_aoclk", + "spdif_mclk", + "spdif_mclk_sel"; + resets = <&reset RESET_AIU>; + }; + -- cgit v1.2.3 From fd00366b8e4125d29e32d49053a702ddf77430f6 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 13 Feb 2020 16:51:58 +0100 Subject: ASoC: meson: gx: add sound card dt-binding documentation Add the dt-binding documentation of sound card supporting the amlogic GX SoC family Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20200213155159.3235792-9-jbrunet@baylibre.com Signed-off-by: Mark Brown --- .../bindings/sound/amlogic,gx-sound-card.yaml | 113 +++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml b/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml new file mode 100644 index 000000000000..fb374c659be1 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/amlogic,gx-sound-card.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic GX sound card + +maintainers: + - Jerome Brunet + +properties: + compatible: + items: + - const: amlogic,gx-sound-card + + audio-aux-devs: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: list of auxiliary devices + + audio-routing: + $ref: /schemas/types.yaml#/definitions/non-unique-string-array + minItems: 2 + description: |- + A list of the connections between audio components. Each entry is a + pair of strings, the first being the connection's sink, the second + being the connection's source. + + audio-widgets: + $ref: /schemas/types.yaml#/definitions/non-unique-string-array + minItems: 2 + description: |- + A list off component DAPM widget. Each entry is a pair of strings, + the first being the widget type, the second being the widget name + + model: + $ref: /schemas/types.yaml#/definitions/string + description: User specified audio sound card name + +patternProperties: + "^dai-link-[0-9]+$": + type: object + description: |- + dai-link child nodes: + Container for dai-link level properties and the CODEC sub-nodes. + There should be at least one (and probably more) subnode of this type + + properties: + dai-format: + $ref: /schemas/types.yaml#/definitions/string + enum: [ i2s, left-j, dsp_a ] + + mclk-fs: + $ref: /schemas/types.yaml#/definitions/uint32 + description: |- + Multiplication factor between the frame rate and master clock + rate + + sound-dai: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle of the CPU DAI + + patternProperties: + "^codec-[0-9]+$": + type: object + description: |- + Codecs: + dai-link representing backend links should have at least one subnode. + One subnode for each codec of the dai-link. dai-link representing + frontend links have no codec, therefore have no subnodes + + properties: + sound-dai: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle of the codec DAI + + required: + - sound-dai + + required: + - sound-dai + +required: + - model + - dai-link-0 + +examples: + - | + sound { + compatible = "amlogic,gx-sound-card"; + model = "GXL-ACME-S905X-FOO"; + audio-aux-devs = <&>; + audio-routing = "I2S ENCODER I2S IN", "I2S FIFO Playback"; + + dai-link-0 { + sound-dai = <&i2s_fifo>; + }; + + dai-link-1 { + sound-dai = <&i2s_encoder>; + dai-format = "i2s"; + mclk-fs = <256>; + + codec-0 { + sound-dai = <&codec0>; + }; + + codec-1 { + sound-dai = <&codec1>; + }; + }; + }; + -- cgit v1.2.3 From 1a874afbbf8812af4c4e269eb3d2c6b201532ed0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 27 Nov 2019 16:46:34 +0200 Subject: dt-bindings: net: wireless: mt76: introduce big-endian property Introduce big-endian property to specify mtd radio partition endianness Signed-off-by: Lorenzo Bianconi Acked-by: Rob Herring Signed-off-by: Felix Fietkau --- Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt index 7e675dafc256..d4d982f7ab37 100644 --- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt +++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt @@ -15,6 +15,8 @@ Optional properties: - ieee80211-freq-limit: See ieee80211.txt - mediatek,mtd-eeprom: Specify a MTD partition + offset containing EEPROM data +- big-endian: if the radio eeprom partition is written in big-endian, specify + this property The MAC address can as well be set with corresponding optional properties defined in net/ethernet.txt. @@ -31,6 +33,7 @@ Optional nodes: reg = <0x0000 0 0 0 0>; ieee80211-freq-limit = <5000000 6000000>; mediatek,mtd-eeprom = <&factory 0x8000>; + big-endian; led { led-sources = <2>; -- cgit v1.2.3 From cd82b0e0ca712e11a831004cda038a246f25cd68 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 30 Jan 2020 13:22:25 +0100 Subject: dt-bindings: net: wireless: mt76: document bindings for MT7622 MT7622 is a SoC that includes a 2.4 GHz 4x4 802.11n WMAC. Its feature set is comparable to a MT7615 chip, but limited to 2.4 GHz. Reviewed-by: Rob Herring Signed-off-by: Felix Fietkau --- .../bindings/net/wireless/mediatek,mt76.txt | 26 +++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt index d4d982f7ab37..3a76d8faaaed 100644 --- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt +++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt @@ -4,13 +4,21 @@ This node provides properties for configuring the MediaTek mt76xx wireless device. The node is expected to be specified as a child node of the PCI controller to which the wireless chip is connected. -Alternatively, it can specify the wireless part of the MT7628/MT7688 SoC. -For SoC, use the compatible string "mediatek,mt7628-wmac" and the following -properties: +Alternatively, it can specify the wireless part of the MT7628/MT7688 or +MT7622 SoC. For SoC, use the following compatible strings: + +compatible: +- "mediatek,mt7628-wmac" for MT7628/MT7688 +- "mediatek,mt7622-wmac" for MT7622 +properties: - reg: Address and length of the register set for the device. - interrupts: Main device interrupt +MT7622 specific properties: +- power-domains: phandle to the power domain that the WMAC is part of +- mediatek,infracfg: phandle to the infrastructure bus fabric syscon node + Optional properties: - ieee80211-freq-limit: See ieee80211.txt @@ -53,3 +61,15 @@ wmac: wmac@10300000 { mediatek,mtd-eeprom = <&factory 0x0000>; }; + +MT7622 example: + +wmac: wmac@18000000 { + compatible = "mediatek,mt7622-wmac"; + reg = <0 0x18000000 0 0x100000>; + interrupts = ; + + mediatek,infracfg = <&infracfg>; + + power-domains = <&scpsys MT7622_POWER_DOMAIN_WB>; +}; -- cgit v1.2.3 From b8e6e27c626e60bb5708ce8c974b58c485c32ac7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Feb 2020 00:36:35 +0100 Subject: Documentation: PM: QoS: Update to reflect previous code changes Update the PM QoS documentation to reflect the previous code changes regarding the removal of PM QoS classes and the CPU latency QoS API rework. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria --- Documentation/admin-guide/pm/cpuidle.rst | 73 +++++++++++++-------------- Documentation/power/pm_qos_interface.rst | 86 +++++++++++++++----------------- 2 files changed, 75 insertions(+), 84 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index 6a06dc473dd6..5605cc6f9560 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -583,20 +583,17 @@ Power Management Quality of Service for CPUs The power management quality of service (PM QoS) framework in the Linux kernel allows kernel code and user space processes to set constraints on various energy-efficiency features of the kernel to prevent performance from dropping -below a required level. The PM QoS constraints can be set globally, in -predefined categories referred to as PM QoS classes, or against individual -devices. +below a required level. CPU idle time management can be affected by PM QoS in two ways, through the -global constraint in the ``PM_QOS_CPU_DMA_LATENCY`` class and through the -resume latency constraints for individual CPUs. Kernel code (e.g. device -drivers) can set both of them with the help of special internal interfaces -provided by the PM QoS framework. User space can modify the former by opening -the :file:`cpu_dma_latency` special device file under :file:`/dev/` and writing -a binary value (interpreted as a signed 32-bit integer) to it. In turn, the -resume latency constraint for a CPU can be modified by user space by writing a -string (representing a signed 32-bit integer) to the -:file:`power/pm_qos_resume_latency_us` file under +global CPU latency limit and through the resume latency constraints for +individual CPUs. Kernel code (e.g. device drivers) can set both of them with +the help of special internal interfaces provided by the PM QoS framework. User +space can modify the former by opening the :file:`cpu_dma_latency` special +device file under :file:`/dev/` and writing a binary value (interpreted as a +signed 32-bit integer) to it. In turn, the resume latency constraint for a CPU +can be modified from user space by writing a string (representing a signed +32-bit integer) to the :file:`power/pm_qos_resume_latency_us` file under :file:`/sys/devices/system/cpu/cpu/` in ``sysfs``, where the CPU number ```` is allocated at the system initialization time. Negative values will be rejected in both cases and, also in both cases, the written integer @@ -605,32 +602,34 @@ number will be interpreted as a requested PM QoS constraint in microseconds. The requested value is not automatically applied as a new constraint, however, as it may be less restrictive (greater in this particular case) than another constraint previously requested by someone else. For this reason, the PM QoS -framework maintains a list of requests that have been made so far in each -global class and for each device, aggregates them and applies the effective -(minimum in this particular case) value as the new constraint. +framework maintains a list of requests that have been made so far for the +global CPU latency limit and for each individual CPU, aggregates them and +applies the effective (minimum in this particular case) value as the new +constraint. In fact, opening the :file:`cpu_dma_latency` special device file causes a new -PM QoS request to be created and added to the priority list of requests in the -``PM_QOS_CPU_DMA_LATENCY`` class and the file descriptor coming from the -"open" operation represents that request. If that file descriptor is then -used for writing, the number written to it will be associated with the PM QoS -request represented by it as a new requested constraint value. Next, the -priority list mechanism will be used to determine the new effective value of -the entire list of requests and that effective value will be set as a new -constraint. Thus setting a new requested constraint value will only change the -real constraint if the effective "list" value is affected by it. In particular, -for the ``PM_QOS_CPU_DMA_LATENCY`` class it only affects the real constraint if -it is the minimum of the requested constraints in the list. The process holding -a file descriptor obtained by opening the :file:`cpu_dma_latency` special device -file controls the PM QoS request associated with that file descriptor, but it -controls this particular PM QoS request only. +PM QoS request to be created and added to a global priority list of CPU latency +limit requests and the file descriptor coming from the "open" operation +represents that request. If that file descriptor is then used for writing, the +number written to it will be associated with the PM QoS request represented by +it as a new requested limit value. Next, the priority list mechanism will be +used to determine the new effective value of the entire list of requests and +that effective value will be set as a new CPU latency limit. Thus requesting a +new limit value will only change the real limit if the effective "list" value is +affected by it, which is the case if it is the minimum of the requested values +in the list. + +The process holding a file descriptor obtained by opening the +:file:`cpu_dma_latency` special device file controls the PM QoS request +associated with that file descriptor, but it controls this particular PM QoS +request only. Closing the :file:`cpu_dma_latency` special device file or, more precisely, the file descriptor obtained while opening it, causes the PM QoS request associated -with that file descriptor to be removed from the ``PM_QOS_CPU_DMA_LATENCY`` -class priority list and destroyed. If that happens, the priority list mechanism -will be used, again, to determine the new effective value for the whole list -and that value will become the new real constraint. +with that file descriptor to be removed from the global priority list of CPU +latency limit requests and destroyed. If that happens, the priority list +mechanism will be used again, to determine the new effective value for the whole +list and that value will become the new limit. In turn, for each CPU there is one resume latency PM QoS request associated with the :file:`power/pm_qos_resume_latency_us` file under @@ -647,10 +646,10 @@ CPU in question every time the list of requests is updated this way or another (there may be other requests coming from kernel code in that list). CPU idle time governors are expected to regard the minimum of the global -effective ``PM_QOS_CPU_DMA_LATENCY`` class constraint and the effective -resume latency constraint for the given CPU as the upper limit for the exit -latency of the idle states they can select for that CPU. They should never -select any idle states with exit latency beyond that limit. +(effective) CPU latency limit and the effective resume latency constraint for +the given CPU as the upper limit for the exit latency of the idle states that +they are allowed to select for that CPU. They should never select any idle +states with exit latency beyond that limit. Idle States Control Via Kernel Command Line diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst index 0d62d506caf0..064f668fbdab 100644 --- a/Documentation/power/pm_qos_interface.rst +++ b/Documentation/power/pm_qos_interface.rst @@ -7,86 +7,78 @@ performance expectations by drivers, subsystems and user space applications on one of the parameters. Two different PM QoS frameworks are available: -1. PM QoS classes for cpu_dma_latency +1. CPU latency QoS. 2. The per-device PM QoS framework provides the API to manage the per-device latency constraints and PM QoS flags. -Each parameters have defined units: - - * latency: usec - * timeout: usec - * throughput: kbs (kilo bit / sec) - * memory bandwidth: mbs (mega bit / sec) +The latency unit used in the PM QoS framework is the microsecond (usec). 1. PM QoS framework =================== -The infrastructure exposes multiple misc device nodes one per implemented -parameter. The set of parameters implement is defined by pm_qos_power_init() -and pm_qos_params.h. This is done because having the available parameters -being runtime configurable or changeable from a driver was seen as too easy to -abuse. - -For each parameter a list of performance requests is maintained along with -an aggregated target value. The aggregated target value is updated with -changes to the request list or elements of the list. Typically the -aggregated target value is simply the max or min of the request values held -in the parameter list elements. +A global list of CPU latency QoS requests is maintained along with an aggregated +(effective) target value. The aggregated target value is updated with changes +to the request list or elements of the list. For CPU latency QoS, the +aggregated target value is simply the min of the request values held in the list +elements. + Note: the aggregated target value is implemented as an atomic variable so that reading the aggregated value does not require any locking mechanism. +From kernel space the use of this interface is simple: -From kernel mode the use of this interface is simple: - -void pm_qos_add_request(handle, param_class, target_value): - Will insert an element into the list for that identified PM QoS class with the - target value. Upon change to this list the new target is recomputed and any - registered notifiers are called only if the target value is now different. - Clients of pm_qos need to save the returned handle for future use in other - pm_qos API functions. +void cpu_latency_qos_add_request(handle, target_value): + Will insert an element into the CPU latency QoS list with the target value. + Upon change to this list the new target is recomputed and any registered + notifiers are called only if the target value is now different. + Clients of PM QoS need to save the returned handle for future use in other + PM QoS API functions. -void pm_qos_update_request(handle, new_target_value): +void cpu_latency_qos_update_request(handle, new_target_value): Will update the list element pointed to by the handle with the new target value and recompute the new aggregated target, calling the notification tree if the target is changed. -void pm_qos_remove_request(handle): +void cpu_latency_qos_remove_request(handle): Will remove the element. After removal it will update the aggregate target and call the notification tree if the target was changed as a result of removing the request. -int pm_qos_request(param_class): - Returns the aggregated value for a given PM QoS class. +int cpu_latency_qos_limit(): + Returns the aggregated value for the CPU latency QoS. + +int cpu_latency_qos_request_active(handle): + Returns if the request is still active, i.e. it has not been removed from the + CPU latency QoS list. -int pm_qos_request_active(handle): - Returns if the request is still active, i.e. it has not been removed from a - PM QoS class constraints list. +int cpu_latency_qos_add_notifier(notifier): + Adds a notification callback function to the CPU latency QoS. The callback is + called when the aggregated value for the CPU latency QoS is changed. -int pm_qos_add_notifier(param_class, notifier): - Adds a notification callback function to the PM QoS class. The callback is - called when the aggregated value for the PM QoS class is changed. +int cpu_latency_qos_remove_notifier(notifier): + Removes the notification callback function from the CPU latency QoS. -int pm_qos_remove_notifier(int param_class, notifier): - Removes the notification callback function for the PM QoS class. +From user space: -From user mode: +The infrastructure exposes one device node, /dev/cpu_dma_latency, for the CPU +latency QoS. -Only processes can register a pm_qos request. To provide for automatic +Only processes can register a PM QoS request. To provide for automatic cleanup of a process, the interface requires the process to register its -parameter requests in the following way: +parameter requests as follows. -To register the default pm_qos target for the specific parameter, the process -must open /dev/cpu_dma_latency +To register the default PM QoS target for the CPU latency QoS, the process must +open /dev/cpu_dma_latency. As long as the device node is held open that process has a registered request on the parameter. -To change the requested target value the process needs to write an s32 value to -the open device node. Alternatively the user mode program could write a hex -string for the value using 10 char long format e.g. "0x12345678". This -translates to a pm_qos_update_request call. +To change the requested target value, the process needs to write an s32 value to +the open device node. Alternatively, it can write a hex string for the value +using the 10 char long format e.g. "0x12345678". This translates to a +cpu_latency_qos_update_request() call. To remove the user mode request for a target value simply close the device node. -- cgit v1.2.3 From eed1015c4c4240780789556d8fbf71df68ce6508 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Sun, 19 Jan 2020 18:43:17 +0530 Subject: dt-bindings: pinctrl: qcom: Add ipq6018 pinctrl bindings Add device tree binding Documentation details for ipq6018 pinctrl driver. Co-developed-by: Rajkumar Ayyasamy Signed-off-by: Rajkumar Ayyasamy Co-developed-by: Selvam Sathappan Periakaruppan Signed-off-by: Selvam Sathappan Periakaruppan Co-developed-by: Sivaprakash Murugesan Signed-off-by: Sivaprakash Murugesan Signed-off-by: Sricharan R Signed-off-by: Sivaprakash Murugesan Link: https://lore.kernel.org/r/1579439601-14810-2-git-send-email-sricharan@codeaurora.org Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../bindings/pinctrl/qcom,ipq6018-pinctrl.yaml | 153 +++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml new file mode 100644 index 000000000000..63d1cfe86c6e --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/qcom,ipq6018-pinctrl.yaml @@ -0,0 +1,153 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/qcom,ipq6018-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Technologies, Inc. IPQ6018 TLMM block + +maintainers: + - Sricharan R + +description: | + This binding describes the Top Level Mode Multiplexer block found in the + IPQ6018 platform. + +properties: + compatible: + const: qcom,ipq6018-pinctrl + + reg: + maxItems: 1 + + interrupts: + description: Specifies the TLMM summary IRQ + maxItems: 1 + + interrupt-controller: true + + '#interrupt-cells': + description: + Specifies the PIN numbers and Flags, as defined in defined in + include/dt-bindings/interrupt-controller/irq.h + const: 2 + + gpio-controller: true + + '#gpio-cells': + description: Specifying the pin number and flags, as defined in + include/dt-bindings/gpio/gpio.h + const: 2 + + gpio-ranges: + maxItems: 1 + +#PIN CONFIGURATION NODES +patternProperties: + '-pinmux$': + type: object + description: + Pinctrl node's client devices use subnodes for desired pin configuration. + Client device subnodes use below standard properties. + allOf: + - $ref: "/schemas/pinctrl/pincfg-node.yaml" + + properties: + pins: + description: + List of gpio pins affected by the properties specified in this + subnode. + items: + oneOf: + - pattern: "^gpio([1-9]|[1-7][0-9]|80)$" + - enum: [ sdc1_clk, sdc1_cmd, sdc1_data, sdc2_clk, sdc2_cmd, + sdc2_data, qdsd_cmd, qdsd_data0, qdsd_data1, qdsd_data2, + qdsd_data3 ] + minItems: 1 + maxItems: 4 + + function: + description: + Specify the alternative function to be configured for the specified + pins. + enum: [ adsp_ext, alsp_int, atest_bbrx0, atest_bbrx1, atest_char, + atest_char0, atest_char1, atest_char2, atest_char3, atest_combodac, + atest_gpsadc0, atest_gpsadc1, atest_tsens, atest_wlan0, + atest_wlan1, backlight_en, bimc_dte0, bimc_dte1, blsp1_i2c, + blsp2_i2c, blsp3_i2c, blsp4_i2c, blsp5_i2c, blsp6_i2c, blsp1_spi, + blsp1_spi_cs1, blsp1_spi_cs2, blsp1_spi_cs3, blsp2_spi, + blsp2_spi_cs1, blsp2_spi_cs2, blsp2_spi_cs3, blsp3_spi, + blsp3_spi_cs1, blsp3_spi_cs2, blsp3_spi_cs3, blsp4_spi, blsp5_spi, + blsp6_spi, blsp1_uart, blsp2_uart, blsp1_uim, blsp2_uim, cam1_rst, + cam1_standby, cam_mclk0, cam_mclk1, cci_async, cci_i2c, cci_timer0, + cci_timer1, cci_timer2, cdc_pdm0, codec_mad, dbg_out, display_5v, + dmic0_clk, dmic0_data, dsi_rst, ebi0_wrcdc, euro_us, ext_lpass, + flash_strobe, gcc_gp1_clk_a, gcc_gp1_clk_b, gcc_gp2_clk_a, + gcc_gp2_clk_b, gcc_gp3_clk_a, gcc_gp3_clk_b, gpio, gsm0_tx0, + gsm0_tx1, gsm1_tx0, gsm1_tx1, gyro_accl, kpsns0, kpsns1, kpsns2, + ldo_en, ldo_update, mag_int, mdp_vsync, modem_tsync, m_voc, + nav_pps, nav_tsync, pa_indicator, pbs0, pbs1, pbs2, pri_mi2s, + pri_mi2s_ws, prng_rosc, pwr_crypto_enabled_a, pwr_crypto_enabled_b, + pwr_modem_enabled_a, pwr_modem_enabled_b, pwr_nav_enabled_a, + pwr_nav_enabled_b, qdss_ctitrig_in_a0, qdss_ctitrig_in_a1, + qdss_ctitrig_in_b0, qdss_ctitrig_in_b1, qdss_ctitrig_out_a0, + qdss_ctitrig_out_a1, qdss_ctitrig_out_b0, qdss_ctitrig_out_b1, + qdss_traceclk_a, qdss_traceclk_b, qdss_tracectl_a, qdss_tracectl_b, + qdss_tracedata_a, qdss_tracedata_b, reset_n, sd_card, sd_write, + sec_mi2s, smb_int, ssbi_wtr0, ssbi_wtr1, uim1, uim2, uim3, + uim_batt, wcss_bt, wcss_fm, wcss_wlan, webcam1_rst ] + + drive-strength: + enum: [2, 4, 6, 8, 10, 12, 14, 16] + default: 2 + description: + Selects the drive strength for the specified pins, in mA. + + bias-pull-down: true + + bias-pull-up: true + + bias-disable: true + + output-high: true + + output-low: true + + required: + - pins + - function + + additionalProperties: false + +required: + - compatible + - reg + - interrupts + - interrupt-controller + - '#interrupt-cells' + - gpio-controller + - '#gpio-cells' + - gpio-ranges + +additionalProperties: false + +examples: + - | + #include + tlmm: pinctrl@1000000 { + compatible = "qcom,ipq6018-pinctrl"; + reg = <0x01000000 0x300000>; + interrupts = ; + interrupt-controller; + #interrupt-cells = <2>; + gpio-controller; + #gpio-cells = <2>; + gpio-ranges = <&tlmm 0 80>; + + serial3-pinmux { + pins = "gpio44", "gpio45"; + function = "blsp2_uart"; + drive-strength = <8>; + bias-pull-down; + }; + }; -- cgit v1.2.3 From a8cf1723c4b794f49d0906f9ddc9d1ac52af4ee2 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Wed, 15 Jan 2020 14:45:41 +0100 Subject: dt-bindings: iio: adc: stm32-adc: convert bindings to json-schema Convert the STM32 ADC binding to DT schema format using json-schema Signed-off-by: Fabrice Gasnier Reviewed-by: Rob Herring Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/adc/st,stm32-adc.txt | 149 ------- .../devicetree/bindings/iio/adc/st,stm32-adc.yaml | 458 +++++++++++++++++++++ 2 files changed, 458 insertions(+), 149 deletions(-) delete mode 100644 Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt create mode 100644 Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt deleted file mode 100644 index 8de933146771..000000000000 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.txt +++ /dev/null @@ -1,149 +0,0 @@ -STMicroelectronics STM32 ADC device driver - -STM32 ADC is a successive approximation analog-to-digital converter. -It has several multiplexed input channels. Conversions can be performed -in single, continuous, scan or discontinuous mode. Result of the ADC is -stored in a left-aligned or right-aligned 32-bit data register. -Conversions can be launched in software or using hardware triggers. - -The analog watchdog feature allows the application to detect if the input -voltage goes beyond the user-defined, higher or lower thresholds. - -Each STM32 ADC block can have up to 3 ADC instances. - -Each instance supports two contexts to manage conversions, each one has its -own configurable sequence and trigger: -- regular conversion can be done in sequence, running in background -- injected conversions have higher priority, and so have the ability to - interrupt regular conversion sequence (either triggered in SW or HW). - Regular sequence is resumed, in case it has been interrupted. - -Contents of a stm32 adc root node: ------------------------------------ -Required properties: -- compatible: Should be one of: - "st,stm32f4-adc-core" - "st,stm32h7-adc-core" - "st,stm32mp1-adc-core" -- reg: Offset and length of the ADC block register set. -- interrupts: One or more interrupts for ADC block. Some parts like stm32f4 - and stm32h7 share a common ADC interrupt line. stm32mp1 has two separate - interrupt lines, one for each ADC within ADC block. -- clocks: Core can use up to two clocks, depending on part used: - - "adc" clock: for the analog circuitry, common to all ADCs. - It's required on stm32f4. - It's optional on stm32h7. - - "bus" clock: for registers access, common to all ADCs. - It's not present on stm32f4. - It's required on stm32h7. -- clock-names: Must be "adc" and/or "bus" depending on part used. -- interrupt-controller: Identifies the controller node as interrupt-parent -- vdda-supply: Phandle to the vdda input analog voltage. -- vref-supply: Phandle to the vref input analog reference voltage. -- #interrupt-cells = <1>; -- #address-cells = <1>; -- #size-cells = <0>; - -Optional properties: -- A pinctrl state named "default" for each ADC channel may be defined to set - inX ADC pins in mode of operation for analog input on external pin. -- booster-supply: Phandle to the embedded booster regulator that can be used - to supply ADC analog input switches on stm32h7 and stm32mp1. -- vdd-supply: Phandle to the vdd input voltage. It can be used to supply ADC - analog input switches on stm32mp1. -- st,syscfg: Phandle to system configuration controller. It can be used to - control the analog circuitry on stm32mp1. -- st,max-clk-rate-hz: Allow to specify desired max clock rate used by analog - circuitry. - -Contents of a stm32 adc child node: ------------------------------------ -An ADC block node should contain at least one subnode, representing an -ADC instance available on the machine. - -Required properties: -- compatible: Should be one of: - "st,stm32f4-adc" - "st,stm32h7-adc" - "st,stm32mp1-adc" -- reg: Offset of ADC instance in ADC block (e.g. may be 0x0, 0x100, 0x200). -- clocks: Input clock private to this ADC instance. It's required only on - stm32f4, that has per instance clock input for registers access. -- interrupts: IRQ Line for the ADC (e.g. may be 0 for adc@0, 1 for adc@100 or - 2 for adc@200). -- st,adc-channels: List of single-ended channels muxed for this ADC. - It can have up to 16 channels on stm32f4 or 20 channels on stm32h7, numbered - from 0 to 15 or 19 (resp. for in0..in15 or in0..in19). -- st,adc-diff-channels: List of differential channels muxed for this ADC. - Depending on part used, some channels can be configured as differential - instead of single-ended (e.g. stm32h7). List here positive and negative - inputs pairs as , ,... vinp and vinn are numbered - from 0 to 19 on stm32h7) - Note: At least one of "st,adc-channels" or "st,adc-diff-channels" is required. - Both properties can be used together. Some channels can be used as - single-ended and some other ones as differential (mixed). But channels - can't be configured both as single-ended and differential (invalid). -- #io-channel-cells = <1>: See the IIO bindings section "IIO consumers" in - Documentation/devicetree/bindings/iio/iio-bindings.txt - -Optional properties: -- dmas: Phandle to dma channel for this ADC instance. - See ../../dma/dma.txt for details. -- dma-names: Must be "rx" when dmas property is being used. -- assigned-resolution-bits: Resolution (bits) to use for conversions. Must - match device available resolutions: - * can be 6, 8, 10 or 12 on stm32f4 - * can be 8, 10, 12, 14 or 16 on stm32h7 - Default is maximum resolution if unset. -- st,min-sample-time-nsecs: Minimum sampling time in nanoseconds. - Depending on hardware (board) e.g. high/low analog input source impedance, - fine tune of ADC sampling time may be recommended. - This can be either one value or an array that matches 'st,adc-channels' list, - to set sample time resp. for all channels, or independently for each channel. - -Example: - adc: adc@40012000 { - compatible = "st,stm32f4-adc-core"; - reg = <0x40012000 0x400>; - interrupts = <18>; - clocks = <&rcc 0 168>; - clock-names = "adc"; - vref-supply = <®_vref>; - interrupt-controller; - pinctrl-names = "default"; - pinctrl-0 = <&adc3_in8_pin>; - - #interrupt-cells = <1>; - #address-cells = <1>; - #size-cells = <0>; - - adc@0 { - compatible = "st,stm32f4-adc"; - #io-channel-cells = <1>; - reg = <0x0>; - clocks = <&rcc 0 168>; - interrupt-parent = <&adc>; - interrupts = <0>; - st,adc-channels = <8>; - dmas = <&dma2 0 0 0x400 0x0>; - dma-names = "rx"; - assigned-resolution-bits = <8>; - }; - ... - other adc child nodes follow... - }; - -Example to setup: -- channel 1 as single-ended -- channels 2 & 3 as differential (with resp. 6 & 7 negative inputs) - - adc: adc@40022000 { - compatible = "st,stm32h7-adc-core"; - ... - adc1: adc@0 { - compatible = "st,stm32h7-adc"; - ... - st,adc-channels = <1>; - st,adc-diff-channels = <2 6>, <3 7>; - }; - }; diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml new file mode 100644 index 000000000000..933ba37944d7 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml @@ -0,0 +1,458 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/bindings/iio/adc/st,stm32-adc.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: STMicroelectronics STM32 ADC bindings + +description: | + STM32 ADC is a successive approximation analog-to-digital converter. + It has several multiplexed input channels. Conversions can be performed + in single, continuous, scan or discontinuous mode. Result of the ADC is + stored in a left-aligned or right-aligned 32-bit data register. + Conversions can be launched in software or using hardware triggers. + + The analog watchdog feature allows the application to detect if the input + voltage goes beyond the user-defined, higher or lower thresholds. + + Each STM32 ADC block can have up to 3 ADC instances. + +maintainers: + - Fabrice Gasnier + +properties: + compatible: + enum: + - st,stm32f4-adc-core + - st,stm32h7-adc-core + - st,stm32mp1-adc-core + + reg: + maxItems: 1 + + interrupts: + description: | + One or more interrupts for ADC block, depending on part used: + - stm32f4 and stm32h7 share a common ADC interrupt line. + - stm32mp1 has two separate interrupt lines, one for each ADC within + ADC block. + minItems: 1 + maxItems: 2 + + clocks: + description: | + Core can use up to two clocks, depending on part used: + - "adc" clock: for the analog circuitry, common to all ADCs. + It's required on stm32f4. + It's optional on stm32h7 and stm32mp1. + - "bus" clock: for registers access, common to all ADCs. + It's not present on stm32f4. + It's required on stm32h7 and stm32mp1. + + clock-names: true + + st,max-clk-rate-hz: + description: + Allow to specify desired max clock rate used by analog circuitry. + + vdda-supply: + description: Phandle to the vdda input analog voltage. + + vref-supply: + description: Phandle to the vref input analog reference voltage. + + booster-supply: + description: + Phandle to the embedded booster regulator that can be used to supply ADC + analog input switches on stm32h7 and stm32mp1. + + vdd-supply: + description: + Phandle to the vdd input voltage. It can be used to supply ADC analog + input switches on stm32mp1. + + st,syscfg: + description: + Phandle to system configuration controller. It can be used to control the + analog circuitry on stm32mp1. + allOf: + - $ref: "/schemas/types.yaml#/definitions/phandle-array" + + interrupt-controller: true + + '#interrupt-cells': + const: 1 + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + +allOf: + - if: + properties: + compatible: + contains: + const: st,stm32f4-adc-core + + then: + properties: + clocks: + maxItems: 1 + + clock-names: + const: adc + + interrupts: + items: + - description: interrupt line common for all ADCs + + st,max-clk-rate-hz: + minimum: 600000 + maximum: 36000000 + default: 36000000 + + booster-supply: false + + vdd-supply: false + + st,syscfg: false + + - if: + properties: + compatible: + contains: + const: st,stm32h7-adc-core + + then: + properties: + clocks: + minItems: 1 + maxItems: 2 + + clock-names: + items: + - const: bus + - const: adc + minItems: 1 + maxItems: 2 + + interrupts: + items: + - description: interrupt line common for all ADCs + + st,max-clk-rate-hz: + minimum: 120000 + maximum: 36000000 + default: 36000000 + + vdd-supply: false + + st,syscfg: false + + - if: + properties: + compatible: + contains: + const: st,stm32mp1-adc-core + + then: + properties: + clocks: + minItems: 1 + maxItems: 2 + + clock-names: + items: + - const: bus + - const: adc + minItems: 1 + maxItems: 2 + + interrupts: + items: + - description: interrupt line for ADC1 + - description: interrupt line for ADC2 + + st,max-clk-rate-hz: + minimum: 120000 + maximum: 36000000 + default: 36000000 + +additionalProperties: false + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - vdda-supply + - vref-supply + - interrupt-controller + - '#interrupt-cells' + - '#address-cells' + - '#size-cells' + +patternProperties: + "^adc@[0-9]+$": + type: object + description: + An ADC block node should contain at least one subnode, representing an + ADC instance available on the machine. + + properties: + compatible: + enum: + - st,stm32f4-adc + - st,stm32h7-adc + - st,stm32mp1-adc + + reg: + description: | + Offset of ADC instance in ADC block. Valid values are: + - 0x0: ADC1 + - 0x100: ADC2 + - 0x200: ADC3 (stm32f4 only) + maxItems: 1 + + '#io-channel-cells': + const: 1 + + interrupts: + description: | + IRQ Line for the ADC instance. Valid values are: + - 0 for adc@0 + - 1 for adc@100 + - 2 for adc@200 (stm32f4 only) + maxItems: 1 + + clocks: + description: + Input clock private to this ADC instance. It's required only on + stm32f4, that has per instance clock input for registers access. + maxItems: 1 + + dmas: + description: RX DMA Channel + maxItems: 1 + + dma-names: + const: rx + + assigned-resolution-bits: + description: | + Resolution (bits) to use for conversions: + - can be 6, 8, 10 or 12 on stm32f4 + - can be 8, 10, 12, 14 or 16 on stm32h7 and stm32mp1 + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + + st,adc-channels: + description: | + List of single-ended channels muxed for this ADC. It can have up to: + - 16 channels, numbered from 0 to 15 (for in0..in15) on stm32f4 + - 20 channels, numbered from 0 to 19 (for in0..in19) on stm32h7 and + stm32mp1. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + + st,adc-diff-channels: + description: | + List of differential channels muxed for this ADC. Some channels can + be configured as differential instead of single-ended on stm32h7 and + on stm32mp1. Positive and negative inputs pairs are listed: + , ,... vinp and vinn are numbered from 0 to 19. + + Note: At least one of "st,adc-channels" or "st,adc-diff-channels" is + required. Both properties can be used together. Some channels can be + used as single-ended and some other ones as differential (mixed). But + channels can't be configured both as single-ended and differential. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-matrix + - items: + items: + - description: | + "vinp" indicates positive input number + minimum: 0 + maximum: 19 + - description: | + "vinn" indicates negative input number + minimum: 0 + maximum: 19 + + st,min-sample-time-nsecs: + description: + Minimum sampling time in nanoseconds. Depending on hardware (board) + e.g. high/low analog input source impedance, fine tune of ADC + sampling time may be recommended. This can be either one value or an + array that matches "st,adc-channels" and/or "st,adc-diff-channels" + list, to set sample time resp. for all channels, or independently for + each channel. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + + allOf: + - if: + properties: + compatible: + contains: + const: st,stm32f4-adc + + then: + properties: + reg: + enum: + - 0x0 + - 0x100 + - 0x200 + + interrupts: + minimum: 0 + maximum: 2 + + assigned-resolution-bits: + enum: [6, 8, 10, 12] + default: 12 + + st,adc-channels: + minItems: 1 + maxItems: 16 + items: + minimum: 0 + maximum: 15 + + st,adc-diff-channels: false + + st,min-sample-time-nsecs: + minItems: 1 + maxItems: 16 + items: + minimum: 80 + + required: + - clocks + + - if: + properties: + compatible: + contains: + enum: + - st,stm32h7-adc + - st,stm32mp1-adc + + then: + properties: + reg: + enum: + - 0x0 + - 0x100 + + interrupts: + minimum: 0 + maximum: 1 + + assigned-resolution-bits: + enum: [8, 10, 12, 14, 16] + default: 16 + + st,adc-channels: + minItems: 1 + maxItems: 20 + items: + minimum: 0 + maximum: 19 + + st,min-sample-time-nsecs: + minItems: 1 + maxItems: 20 + items: + minimum: 40 + + additionalProperties: false + + anyOf: + - required: + - st,adc-channels + - required: + - st,adc-diff-channels + + required: + - compatible + - reg + - interrupts + - '#io-channel-cells' + +examples: + - | + // Example 1: with stm32f429, ADC1, single-ended channel 8 + adc123: adc@40012000 { + compatible = "st,stm32f4-adc-core"; + reg = <0x40012000 0x400>; + interrupts = <18>; + clocks = <&rcc 0 168>; + clock-names = "adc"; + st,max-clk-rate-hz = <36000000>; + vdda-supply = <&vdda>; + vref-supply = <&vref>; + interrupt-controller; + #interrupt-cells = <1>; + #address-cells = <1>; + #size-cells = <0>; + adc@0 { + compatible = "st,stm32f4-adc"; + #io-channel-cells = <1>; + reg = <0x0>; + clocks = <&rcc 0 168>; + interrupt-parent = <&adc123>; + interrupts = <0>; + st,adc-channels = <8>; + dmas = <&dma2 0 0 0x400 0x0>; + dma-names = "rx"; + assigned-resolution-bits = <8>; + }; + // ... + // other adc child nodes follow... + }; + + - | + // Example 2: with stm32mp157c to setup ADC1 with: + // - channels 0 & 1 as single-ended + // - channels 2 & 3 as differential (with resp. 6 & 7 negative inputs) + #include + #include + adc12: adc@48003000 { + compatible = "st,stm32mp1-adc-core"; + reg = <0x48003000 0x400>; + interrupts = , + ; + clocks = <&rcc ADC12>, <&rcc ADC12_K>; + clock-names = "bus", "adc"; + booster-supply = <&booster>; + vdd-supply = <&vdd>; + vdda-supply = <&vdda>; + vref-supply = <&vref>; + st,syscfg = <&syscfg>; + interrupt-controller; + #interrupt-cells = <1>; + #address-cells = <1>; + #size-cells = <0>; + adc@0 { + compatible = "st,stm32mp1-adc"; + #io-channel-cells = <1>; + reg = <0x0>; + interrupt-parent = <&adc12>; + interrupts = <0>; + st,adc-channels = <0 1>; + st,adc-diff-channels = <2 6>, <3 7>; + st,min-sample-time-nsecs = <5000>; + dmas = <&dmamux1 9 0x400 0x05>; + dma-names = "rx"; + }; + // ... + // other adc child node follow... + }; + +... -- cgit v1.2.3 From a2ff6e0347a9f1cbf930a1b121027c950c68c4ea Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 26 Jan 2020 16:05:47 +0100 Subject: iio: light: Add DT bindings for GP2AP002 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds device tree bindings for the GP2AP002 light and proximity sensor. As with other early proximity sensors (~2010) the light sensor and proximity sensors were combined into a single component. Cc: Stephan Gerhold Cc: Minkyu Kang Cc: Paweł Chmiel Cc: Jonathan Bakker Cc: Oskar Andero Cc: Dmitry Torokhov Cc: devicetree@vger.kernel.org Reviewed-by: Rob Herring Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- .../bindings/iio/light/sharp,gp2ap002.yaml | 85 ++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml b/Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml new file mode 100644 index 000000000000..12aa16f24772 --- /dev/null +++ b/Documentation/devicetree/bindings/iio/light/sharp,gp2ap002.yaml @@ -0,0 +1,85 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iio/light/sharp,gp2ap002.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sharp GP2AP002A00F and GP2AP002S00F proximity and ambient light sensors + +maintainers: + - Linus Walleij + +description: | + Proximity and ambient light sensor with IR LED for the proximity + sensing and an analog output for light intensity. The ambient light + sensor output is not available on the GP2AP002S00F variant. + +properties: + compatible: + enum: + - sharp,gp2ap002a00f + - sharp,gp2ap002s00f + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + description: an interrupt for proximity, usually a GPIO line + + vdd-supply: + description: VDD power supply a phandle to a regulator + + vio-supply: + description: VIO power supply a phandle to a regulator + + io-channels: + maxItems: 1 + description: ALSOUT ADC channel to read the ambient light + + io-channel-names: + const: alsout + + sharp,proximity-far-hysteresis: + $ref: /schemas/types.yaml#/definitions/uint8 + description: | + Hysteresis setting for "far" object detection, this setting is + device-unique and adjust the optical setting for proximity detection + of a "far away" object in front of the sensor. + + sharp,proximity-close-hysteresis: + $ref: /schemas/types.yaml#/definitions/uint8 + description: | + Hysteresis setting for "close" object detection, this setting is + device-unique and adjust the optical setting for proximity detection + of a "close" object in front of the sensor. + +required: + - compatible + - reg + - interrupts + - sharp,proximity-far-hysteresis + - sharp,proximity-close-hysteresis + +examples: + - | + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + light-sensor@44 { + compatible = "sharp,gp2ap002a00f"; + reg = <0x44>; + interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + vdd-supply = <&vdd_regulator>; + vio-supply = <&vio_regulator>; + io-channels = <&adc_channel>; + io-channel-names = "alsout"; + sharp,proximity-far-hysteresis = /bits/ 8 <0x2f>; + sharp,proximity-close-hysteresis = /bits/ 8 <0x0f>; + }; + }; + +... -- cgit v1.2.3 From 66053d5d1b74be17b133775a4373888ba227e902 Mon Sep 17 00:00:00 2001 From: Andreas Klinger Date: Mon, 20 Jan 2020 17:44:55 +0100 Subject: dt-bindings: devantech-srf04.yaml: add pm feature Add GPIO line and startup time for usage of power management Reviewed-by: Rob Herring Signed-off-by: Andreas Klinger Signed-off-by: Jonathan Cameron --- .../bindings/iio/proximity/devantech-srf04.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/iio/proximity/devantech-srf04.yaml b/Documentation/devicetree/bindings/iio/proximity/devantech-srf04.yaml index 4e80ea7c1475..8afbac24c34e 100644 --- a/Documentation/devicetree/bindings/iio/proximity/devantech-srf04.yaml +++ b/Documentation/devicetree/bindings/iio/proximity/devantech-srf04.yaml @@ -51,6 +51,24 @@ properties: the time between two interrupts is measured in the driver. maxItems: 1 + power-gpios: + description: + Definition of the GPIO for power management of connected peripheral + (output). + This GPIO can be used by the external hardware for power management. + When the device gets suspended it's switched off and when it resumes + it's switched on again. After some period of inactivity the driver + get suspended automatically (autosuspend feature). + maxItems: 1 + + startup-time-ms: + description: + This is the startup time the device needs after a resume to be up and + running. + minimum: 0 + maximum: 1000 + default: 100 + required: - compatible - trig-gpios -- cgit v1.2.3 From fc976f50ae77c6a841803d816b057417b9b14293 Mon Sep 17 00:00:00 2001 From: Bastian Germann Date: Fri, 14 Feb 2020 12:10:02 +0100 Subject: dt-bindings: arm: sunxi: Add Linutronix Testbox Add device tree bindings for the newly added Linutronix Testbox board. Signed-off-by: Bastian Germann Signed-off-by: Maxime Ripard --- Documentation/devicetree/bindings/arm/sunxi.yaml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml index 159060b65c5d..1e63c9867749 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.yaml +++ b/Documentation/devicetree/bindings/arm/sunxi.yaml @@ -394,6 +394,12 @@ properties: - const: linksprite,pcduino3-nano - const: allwinner,sun7i-a20 + - description: Linutronix Testbox v2 + items: + - const: linutronix,testbox-v2 + - const: lamobo,lamobo-r1 + - const: allwinner,sun7i-a20 + - description: HAOYU Electronics Marsboard A10 items: - const: haoyu,a10-marsboard -- cgit v1.2.3 From b7dec2cb19f3114c9e34431bb52ebe88db9b6363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 30 Jan 2020 14:15:47 +0100 Subject: dt-bindings: iio: ltc2632: expand for ltc2636 support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ltc2636 family of devices is register compatible with the ltc2636 chips, it just features 8 instead of 2 channels. Signed-off-by: Uwe Kleine-König Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/dac/ltc2632.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/iio/dac/ltc2632.txt b/Documentation/devicetree/bindings/iio/dac/ltc2632.txt index e0d5fea33031..338c3220f01a 100644 --- a/Documentation/devicetree/bindings/iio/dac/ltc2632.txt +++ b/Documentation/devicetree/bindings/iio/dac/ltc2632.txt @@ -1,4 +1,4 @@ -Linear Technology LTC2632 DAC device driver +Linear Technology LTC2632/2636 DAC Required properties: - compatible: Has to contain one of the following: @@ -8,6 +8,12 @@ Required properties: lltc,ltc2632-h12 lltc,ltc2632-h10 lltc,ltc2632-h8 + lltc,ltc2636-l12 + lltc,ltc2636-l10 + lltc,ltc2636-l8 + lltc,ltc2636-h12 + lltc,ltc2636-h10 + lltc,ltc2636-h8 Property rules described in Documentation/devicetree/bindings/spi/spi-bus.txt apply. In particular, "reg" and "spi-max-frequency" properties must be given. -- cgit v1.2.3 From 816abe42b8bde912c58d862d540a2f57ba967477 Mon Sep 17 00:00:00 2001 From: Tomer Maimon Date: Mon, 3 Feb 2020 17:09:16 +0200 Subject: dt-binding: iio: add NPCM ADC reset support Add NPCM ADC reset binding documentation. Signed-off-by: Tomer Maimon Acked-by: Rob Herring Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/adc/nuvoton,npcm-adc.txt | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/iio/adc/nuvoton,npcm-adc.txt b/Documentation/devicetree/bindings/iio/adc/nuvoton,npcm-adc.txt index eb939fe77836..ef8eeec1a997 100644 --- a/Documentation/devicetree/bindings/iio/adc/nuvoton,npcm-adc.txt +++ b/Documentation/devicetree/bindings/iio/adc/nuvoton,npcm-adc.txt @@ -6,6 +6,7 @@ Required properties: - compatible: "nuvoton,npcm750-adc" for the NPCM7XX BMC. - reg: specifies physical base address and size of the registers. - interrupts: Contain the ADC interrupt with flags for falling edge. +- resets : phandle to the reset control for this device. Optional properties: - clocks: phandle of ADC reference clock, in case the clock is not @@ -21,4 +22,5 @@ adc: adc@f000c000 { reg = <0xf000c000 0x8>; interrupts = ; clocks = <&clk NPCM7XX_CLK_ADC>; + resets = <&rstc NPCM7XX_RESET_IPSRST1 NPCM7XX_RESET_ADC>; }; -- cgit v1.2.3 From b9cd7be04b0e6cb844f4c64ce05ba892095eb521 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Maneyrol Date: Thu, 6 Feb 2020 11:31:05 +0100 Subject: dt-bindings: add description for new supported chips Add missing mpu6000 and new icm20609, icm20689, icm20690, and iam20680. Signed-off-by: Jean-Baptiste Maneyrol Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt b/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt index c5ee8a20af9f..f2f64749e818 100644 --- a/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt +++ b/Documentation/devicetree/bindings/iio/imu/inv_mpu6050.txt @@ -4,6 +4,7 @@ http://www.invensense.com/mems/gyro/mpu6050.html Required properties: - compatible : should be one of + "invensense,mpu6000" "invensense,mpu6050" "invensense,mpu6500" "invensense,mpu6515" @@ -11,7 +12,11 @@ Required properties: "invensense,mpu9250" "invensense,mpu9255" "invensense,icm20608" + "invensense,icm20609" + "invensense,icm20689" "invensense,icm20602" + "invensense,icm20690" + "invensense,iam20680" - reg : the I2C address of the sensor - interrupts: interrupt mapping for IRQ. It should be configured with flags IRQ_TYPE_LEVEL_HIGH, IRQ_TYPE_EDGE_RISING, IRQ_TYPE_LEVEL_LOW or -- cgit v1.2.3 From 165b5fb294e878f00015b7beb91cb00e36e4f8b8 Mon Sep 17 00:00:00 2001 From: Jianxin Pan Date: Wed, 15 Jan 2020 19:30:29 +0800 Subject: dt-bindings: power: add Amlogic secure power domains bindings Add the bindings for the Amlogic Secure power domains, controlling the secure power domains. The bindings targets the Amlogic A1 and C1 compatible SoCs, in which the power domain registers are in secure world. Signed-off-by: Jianxin Pan Signed-off-by: Kevin Hilman Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/1579087831-94965-3-git-send-email-jianxin.pan@amlogic.com --- .../bindings/power/amlogic,meson-sec-pwrc.yaml | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml b/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml new file mode 100644 index 000000000000..af32209218bb --- /dev/null +++ b/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: (GPL-2.0+ OR MIT) +# Copyright (c) 2019 Amlogic, Inc +# Author: Jianxin Pan +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/power/amlogic,meson-sec-pwrc.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Amlogic Meson Secure Power Domains + +maintainers: + - Jianxin Pan + +description: |+ + Secure Power Domains used in Meson A1/C1 SoCs, and should be the child node + of secure-monitor. + +properties: + compatible: + enum: + - amlogic,meson-a1-pwrc + + "#power-domain-cells": + const: 1 + +required: + - compatible + - "#power-domain-cells" + +examples: + - | + secure-monitor { + compatible = "amlogic,meson-gxbb-sm"; + + pwrc: power-controller { + compatible = "amlogic,meson-a1-pwrc"; + #power-domain-cells = <1>; + }; + } + -- cgit v1.2.3 From 3b375426d4a0077ae10e48039912a2e3cecd7888 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 11 Feb 2020 20:48:27 +0800 Subject: dt-bindings: arm: imx: Add the i.MX8MP EVK board Add board binding for i.MX8MP EVK board. Signed-off-by: Anson Huang Reviewed-by: Rob Herring Signed-off-by: Shawn Guo --- Documentation/devicetree/bindings/arm/fsl.yaml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index 49ab2d83b744..e654a6376bc4 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -324,6 +324,12 @@ properties: - fsl,imx8mn-evk # i.MX8MN LPDDR4 EVK Board - const: fsl,imx8mn + - description: i.MX8MP based Boards + items: + - enum: + - fsl,imx8mp-evk # i.MX8MP EVK Board + - const: fsl,imx8mp + - description: i.MX8MQ based Boards items: - enum: -- cgit v1.2.3 From 39faeba7077ef87e1d0d170eda2cfb7b3b79f726 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 13 Jan 2020 23:24:11 -0800 Subject: dt-bindings: tegra: Convert Tegra PMC bindings to YAML This patch converts text based Tegra PMC bindings document to YAML schema for performing dt validation. Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko Reviewed-by: Rob Herring Signed-off-by: Sowjanya Komatineni Signed-off-by: Thierry Reding --- .../bindings/arm/tegra/nvidia,tegra20-pmc.txt | 300 ------------------ .../bindings/arm/tegra/nvidia,tegra20-pmc.yaml | 340 +++++++++++++++++++++ 2 files changed, 340 insertions(+), 300 deletions(-) delete mode 100644 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt create mode 100644 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt deleted file mode 100644 index cb12f33a247f..000000000000 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt +++ /dev/null @@ -1,300 +0,0 @@ -NVIDIA Tegra Power Management Controller (PMC) - -== Power Management Controller Node == - -The PMC block interacts with an external Power Management Unit. The PMC -mostly controls the entry and exit of the system from different sleep -modes. It provides power-gating controllers for SoC and CPU power-islands. - -Required properties: -- name : Should be pmc -- compatible : Should contain one of the following: - For Tegra20 must contain "nvidia,tegra20-pmc". - For Tegra30 must contain "nvidia,tegra30-pmc". - For Tegra114 must contain "nvidia,tegra114-pmc" - For Tegra124 must contain "nvidia,tegra124-pmc" - For Tegra132 must contain "nvidia,tegra124-pmc" - For Tegra210 must contain "nvidia,tegra210-pmc" -- reg : Offset and length of the register set for the device -- clocks : Must contain an entry for each entry in clock-names. - See ../clocks/clock-bindings.txt for details. -- clock-names : Must include the following entries: - "pclk" (The Tegra clock of that name), - "clk32k_in" (The 32KHz clock input to Tegra). - -Optional properties: -- nvidia,invert-interrupt : If present, inverts the PMU interrupt signal. - The PMU is an external Power Management Unit, whose interrupt output - signal is fed into the PMC. This signal is optionally inverted, and then - fed into the ARM GIC. The PMC is not involved in the detection or - handling of this interrupt signal, merely its inversion. -- nvidia,suspend-mode : The suspend mode that the platform should use. - Valid values are 0, 1 and 2: - 0 (LP0): CPU + Core voltage off and DRAM in self-refresh - 1 (LP1): CPU voltage off and DRAM in self-refresh - 2 (LP2): CPU voltage off -- nvidia,core-power-req-active-high : Boolean, core power request active-high -- nvidia,sys-clock-req-active-high : Boolean, system clock request active-high -- nvidia,combined-power-req : Boolean, combined power request for CPU & Core -- nvidia,cpu-pwr-good-en : Boolean, CPU power good signal (from PMIC to PMC) - is enabled. - -Required properties when nvidia,suspend-mode is specified: -- nvidia,cpu-pwr-good-time : CPU power good time in uS. -- nvidia,cpu-pwr-off-time : CPU power off time in uS. -- nvidia,core-pwr-good-time : - Core power good time in uS. -- nvidia,core-pwr-off-time : Core power off time in uS. - -Required properties when nvidia,suspend-mode=<0>: -- nvidia,lp0-vec : Starting address and length of LP0 vector - The LP0 vector contains the warm boot code that is executed by AVP when - resuming from the LP0 state. The AVP (Audio-Video Processor) is an ARM7 - processor and always being the first boot processor when chip is power on - or resume from deep sleep mode. When the system is resumed from the deep - sleep mode, the warm boot code will restore some PLLs, clocks and then - bring up CPU0 for resuming the system. - -Hardware-triggered thermal reset: -On Tegra30, Tegra114 and Tegra124, if the 'i2c-thermtrip' subnode exists, -hardware-triggered thermal reset will be enabled. - -Required properties for hardware-triggered thermal reset (inside 'i2c-thermtrip'): -- nvidia,i2c-controller-id : ID of I2C controller to send poweroff command to. Valid values are - described in section 9.2.148 "APBDEV_PMC_SCRATCH53_0" of the - Tegra K1 Technical Reference Manual. -- nvidia,bus-addr : Bus address of the PMU on the I2C bus -- nvidia,reg-addr : I2C register address to write poweroff command to -- nvidia,reg-data : Poweroff command to write to PMU - -Optional properties for hardware-triggered thermal reset (inside 'i2c-thermtrip'): -- nvidia,pinmux-id : Pinmux used by the hardware when issuing poweroff command. - Defaults to 0. Valid values are described in section 12.5.2 - "Pinmux Support" of the Tegra4 Technical Reference Manual. - -Optional nodes: -- powergates : This node contains a hierarchy of power domain nodes, which - should match the powergates on the Tegra SoC. See "Powergate - Nodes" below. - -Example: - -/ SoC dts including file -pmc@7000f400 { - compatible = "nvidia,tegra20-pmc"; - reg = <0x7000e400 0x400>; - clocks = <&tegra_car 110>, <&clk32k_in>; - clock-names = "pclk", "clk32k_in"; - nvidia,invert-interrupt; - nvidia,suspend-mode = <1>; - nvidia,cpu-pwr-good-time = <2000>; - nvidia,cpu-pwr-off-time = <100>; - nvidia,core-pwr-good-time = <3845 3845>; - nvidia,core-pwr-off-time = <458>; - nvidia,core-power-req-active-high; - nvidia,sys-clock-req-active-high; - nvidia,lp0-vec = <0xbdffd000 0x2000>; -}; - -/ Tegra board dts file -{ - ... - pmc@7000f400 { - i2c-thermtrip { - nvidia,i2c-controller-id = <4>; - nvidia,bus-addr = <0x40>; - nvidia,reg-addr = <0x36>; - nvidia,reg-data = <0x2>; - }; - }; - ... - clocks { - compatible = "simple-bus"; - #address-cells = <1>; - #size-cells = <0>; - - clk32k_in: clock { - compatible = "fixed-clock"; - reg=<0>; - #clock-cells = <0>; - clock-frequency = <32768>; - }; - }; - ... -}; - - -== Powergate Nodes == - -Each of the powergate nodes represents a power-domain on the Tegra SoC -that can be power-gated by the Tegra PMC. The name of the powergate node -should be one of the below. Note that not every powergate is applicable -to all Tegra devices and the following list shows which powergates are -applicable to which devices. Please refer to the Tegra TRM for more -details on the various powergates. - - Name Description Devices Applicable - 3d 3D Graphics Tegra20/114/124/210 - 3d0 3D Graphics 0 Tegra30 - 3d1 3D Graphics 1 Tegra30 - aud Audio Tegra210 - dfd Debug Tegra210 - dis Display A Tegra114/124/210 - disb Display B Tegra114/124/210 - heg 2D Graphics Tegra30/114/124/210 - iram Internal RAM Tegra124/210 - mpe MPEG Encode All - nvdec NVIDIA Video Decode Engine Tegra210 - nvjpg NVIDIA JPEG Engine Tegra210 - pcie PCIE Tegra20/30/124/210 - sata SATA Tegra30/124/210 - sor Display interfaces Tegra124/210 - ve2 Video Encode Engine 2 Tegra210 - venc Video Encode Engine All - vdec Video Decode Engine Tegra20/30/114/124 - vic Video Imaging Compositor Tegra124/210 - xusba USB Partition A Tegra114/124/210 - xusbb USB Partition B Tegra114/124/210 - xusbc USB Partition C Tegra114/124/210 - -Required properties: - - clocks: Must contain an entry for each clock required by the PMC for - controlling a power-gate. See ../clocks/clock-bindings.txt for details. - - resets: Must contain an entry for each reset required by the PMC for - controlling a power-gate. See ../reset/reset.txt for details. - - #power-domain-cells: Must be 0. - -Example: - - pmc: pmc@7000e400 { - compatible = "nvidia,tegra210-pmc"; - reg = <0x0 0x7000e400 0x0 0x400>; - clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>; - clock-names = "pclk", "clk32k_in"; - - powergates { - pd_audio: aud { - clocks = <&tegra_car TEGRA210_CLK_APE>, - <&tegra_car TEGRA210_CLK_APB2APE>; - resets = <&tegra_car 198>; - #power-domain-cells = <0>; - }; - }; - }; - - -== Powergate Clients == - -Hardware blocks belonging to a power domain should contain a "power-domains" -property that is a phandle pointing to the corresponding powergate node. - -Example: - - adma: adma@702e2000 { - ... - power-domains = <&pd_audio>; - ... - }; - -== Pad Control == - -On Tegra SoCs a pad is a set of pins which are configured as a group. -The pin grouping is a fixed attribute of the hardware. The PMC can be -used to set pad power state and signaling voltage. A pad can be either -in active or power down mode. The support for power state and signaling -voltage configuration varies depending on the pad in question. 3.3 V and -1.8 V signaling voltages are supported on pins where software -controllable signaling voltage switching is available. - -The pad configuration state nodes are placed under the pmc node and they -are referred to by the pinctrl client properties. For more information -see Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt. -The pad name should be used as the value of the pins property in pin -configuration nodes. - -The following pads are present on Tegra124 and Tegra132: -audio bb cam comp -csia csb cse dsi -dsib dsic dsid hdmi -hsic hv lvds mipi-bias -nand pex-bias pex-clk1 pex-clk2 -pex-cntrl sdmmc1 sdmmc3 sdmmc4 -sys_ddc uart usb0 usb1 -usb2 usb_bias - -The following pads are present on Tegra210: -audio audio-hv cam csia -csib csic csid csie -csif dbg debug-nonao dmic -dp dsi dsib dsic -dsid emmc emmc2 gpio -hdmi hsic lvds mipi-bias -pex-bias pex-clk1 pex-clk2 pex-cntrl -sdmmc1 sdmmc3 spi spi-hv -uart usb0 usb1 usb2 -usb3 usb-bias - -Required pin configuration properties: - - pins: Must contain name of the pad(s) to be configured. - -Optional pin configuration properties: - - low-power-enable: Configure the pad into power down mode - - low-power-disable: Configure the pad into active mode - - power-source: Must contain either TEGRA_IO_PAD_VOLTAGE_1V8 - or TEGRA_IO_PAD_VOLTAGE_3V3 to select between signaling voltages. - The values are defined in - include/dt-bindings/pinctrl/pinctrl-tegra-io-pad.h. - -Note: The power state can be configured on all of the Tegra124 and - Tegra132 pads. None of the Tegra124 or Tegra132 pads support - signaling voltage switching. - -Note: All of the listed Tegra210 pads except pex-cntrl support power - state configuration. Signaling voltage switching is supported on - following Tegra210 pads: audio, audio-hv, cam, dbg, dmic, gpio, - pex-cntrl, sdmmc1, sdmmc3, spi, spi-hv, and uart. - -Pad configuration state example: - pmc: pmc@7000e400 { - compatible = "nvidia,tegra210-pmc"; - reg = <0x0 0x7000e400 0x0 0x400>; - clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>; - clock-names = "pclk", "clk32k_in"; - - ... - - sdmmc1_3v3: sdmmc1-3v3 { - pins = "sdmmc1"; - power-source = ; - }; - - sdmmc1_1v8: sdmmc1-1v8 { - pins = "sdmmc1"; - power-source = ; - }; - - hdmi_off: hdmi-off { - pins = "hdmi"; - low-power-enable; - } - - hdmi_on: hdmi-on { - pins = "hdmi"; - low-power-disable; - } - }; - -Pinctrl client example: - sdmmc1: sdhci@700b0000 { - ... - pinctrl-names = "sdmmc-3v3", "sdmmc-1v8"; - pinctrl-0 = <&sdmmc1_3v3>; - pinctrl-1 = <&sdmmc1_1v8>; - }; - ... - sor@54540000 { - ... - pinctrl-0 = <&hdmi_off>; - pinctrl-1 = <&hdmi_on>; - pinctrl-names = "hdmi-on", "hdmi-off"; - }; diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml new file mode 100644 index 000000000000..3ff34b348141 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml @@ -0,0 +1,340 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra20-pmc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Tegra Power Management Controller (PMC) + +maintainers: + - Thierry Reding + - Jonathan Hunter + +properties: + compatible: + enum: + - nvidia,tegra20-pmc + - nvidia,tegra20-pmc + - nvidia,tegra30-pmc + - nvidia,tegra114-pmc + - nvidia,tegra124-pmc + - nvidia,tegra210-pmc + + reg: + maxItems: 1 + description: + Offset and length of the register set for the device. + + clock-names: + items: + - const: pclk + - const: clk32k_in + description: + Must includes entries pclk and clk32k_in. + pclk is the Tegra clock of that name and clk32k_in is 32KHz clock + input to Tegra. + + clocks: + maxItems: 2 + description: + Must contain an entry for each entry in clock-names. + See ../clocks/clocks-bindings.txt for details. + + '#interrupt-cells': + const: 2 + description: + Specifies number of cells needed to encode an interrupt source. + The value must be 2. + + interrupt-controller: true + + nvidia,invert-interrupt: + $ref: /schemas/types.yaml#/definitions/flag + description: Inverts the PMU interrupt signal. + The PMU is an external Power Management Unit, whose interrupt output + signal is fed into the PMC. This signal is optionally inverted, and + then fed into the ARM GIC. The PMC is not involved in the detection + or handling of this interrupt signal, merely its inversion. + + nvidia,core-power-req-active-high: + $ref: /schemas/types.yaml#/definitions/flag + description: Core power request active-high. + + nvidia,sys-clock-req-active-high: + $ref: /schemas/types.yaml#/definitions/flag + description: System clock request active-high. + + nvidia,combined-power-req: + $ref: /schemas/types.yaml#/definitions/flag + description: combined power request for CPU and Core. + + nvidia,cpu-pwr-good-en: + $ref: /schemas/types.yaml#/definitions/flag + description: + CPU power good signal from external PMIC to PMC is enabled. + + nvidia,suspend-mode: + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - enum: [0, 1, 2] + description: + The suspend mode that the platform should use. + Mode 0 is for LP0, CPU + Core voltage off and DRAM in self-refresh + Mode 1 is for LP1, CPU voltage off and DRAM in self-refresh + Mode 2 is for LP2, CPU voltage off + + nvidia,cpu-pwr-good-time: + $ref: /schemas/types.yaml#/definitions/uint32 + description: CPU power good time in uSec. + + nvidia,cpu-pwr-off-time: + $ref: /schemas/types.yaml#/definitions/uint32 + description: CPU power off time in uSec. + + nvidia,core-pwr-good-time: + $ref: /schemas/types.yaml#/definitions/uint32-array + description: + + Core power good time in uSec. + + nvidia,core-pwr-off-time: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Core power off time in uSec. + + nvidia,lp0-vec: + $ref: /schemas/types.yaml#/definitions/uint32-array + description: + Starting address and length of LP0 vector. + The LP0 vector contains the warm boot code that is executed + by AVP when resuming from the LP0 state. + The AVP (Audio-Video Processor) is an ARM7 processor and + always being the first boot processor when chip is power on + or resume from deep sleep mode. When the system is resumed + from the deep sleep mode, the warm boot code will restore + some PLLs, clocks and then brings up CPU0 for resuming the + system. + + i2c-thermtrip: + type: object + description: + On Tegra30, Tegra114 and Tegra124 if i2c-thermtrip subnode exists, + hardware-triggered thermal reset will be enabled. + + properties: + nvidia,i2c-controller-id: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + ID of I2C controller to send poweroff command to PMU. + Valid values are described in section 9.2.148 + "APBDEV_PMC_SCRATCH53_0" of the Tegra K1 Technical Reference + Manual. + + nvidia,bus-addr: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Bus address of the PMU on the I2C bus. + + nvidia,reg-addr: + $ref: /schemas/types.yaml#/definitions/uint32 + description: PMU I2C register address to issue poweroff command. + + nvidia,reg-data: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Poweroff command to write to PMU. + + nvidia,pinmux-id: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Pinmux used by the hardware when issuing Poweroff command. + Defaults to 0. Valid values are described in section 12.5.2 + "Pinmux Support" of the Tegra4 Technical Reference Manual. + + required: + - nvidia,i2c-controller-id + - nvidia,bus-addr + - nvidia,reg-addr + - nvidia,reg-data + + additionalProperties: false + + powergates: + type: object + description: | + This node contains a hierarchy of power domain nodes, which should + match the powergates on the Tegra SoC. Each powergate node + represents a power-domain on the Tegra SoC that can be power-gated + by the Tegra PMC. + Hardware blocks belonging to a power domain should contain + "power-domains" property that is a phandle pointing to corresponding + powergate node. + The name of the powergate node should be one of the below. Note that + not every powergate is applicable to all Tegra devices and the following + list shows which powergates are applicable to which devices. + Please refer to Tegra TRM for mode details on the powergate nodes to + use for each power-gate block inside Tegra. + Name Description Devices Applicable + 3d 3D Graphics Tegra20/114/124/210 + 3d0 3D Graphics 0 Tegra30 + 3d1 3D Graphics 1 Tegra30 + aud Audio Tegra210 + dfd Debug Tegra210 + dis Display A Tegra114/124/210 + disb Display B Tegra114/124/210 + heg 2D Graphics Tegra30/114/124/210 + iram Internal RAM Tegra124/210 + mpe MPEG Encode All + nvdec NVIDIA Video Decode Engine Tegra210 + nvjpg NVIDIA JPEG Engine Tegra210 + pcie PCIE Tegra20/30/124/210 + sata SATA Tegra30/124/210 + sor Display interfaces Tegra124/210 + ve2 Video Encode Engine 2 Tegra210 + venc Video Encode Engine All + vdec Video Decode Engine Tegra20/30/114/124 + vic Video Imaging Compositor Tegra124/210 + xusba USB Partition A Tegra114/124/210 + xusbb USB Partition B Tegra114/124/210 + xusbc USB Partition C Tegra114/124/210 + + patternProperties: + "^[a-z0-9]+$": + type: object + + patternProperties: + clocks: + minItems: 1 + maxItems: 8 + description: + Must contain an entry for each clock required by the PMC + for controlling a power-gate. + See ../clocks/clock-bindings.txt document for more details. + + resets: + minItems: 1 + maxItems: 8 + description: + Must contain an entry for each reset required by the PMC + for controlling a power-gate. + See ../reset/reset.txt for more details. + + '#power-domain-cells': + const: 0 + description: Must be 0. + + required: + - clocks + - resets + - '#power-domain-cells' + + additionalProperties: false + +patternProperties: + "^[a-f0-9]+-[a-f0-9]+$": + type: object + description: + This is a Pad configuration node. On Tegra SOCs a pad is a set of + pins which are configured as a group. The pin grouping is a fixed + attribute of the hardware. The PMC can be used to set pad power state + and signaling voltage. A pad can be either in active or power down mode. + The support for power state and signaling voltage configuration varies + depending on the pad in question. 3.3V and 1.8V signaling voltages + are supported on pins where software controllable signaling voltage + switching is available. + + The pad configuration state nodes are placed under the pmc node and they + are referred to by the pinctrl client properties. For more information + see Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt. + The pad name should be used as the value of the pins property in pin + configuration nodes. + + The following pads are present on Tegra124 and Tegra132 + audio, bb, cam, comp, csia, csb, cse, dsi, dsib, dsic, dsid, hdmi, hsic, + hv, lvds, mipi-bias, nand, pex-bias, pex-clk1, pex-clk2, pex-cntrl, + sdmmc1, sdmmc3, sdmmc4, sys_ddc, uart, usb0, usb1, usb2, usb_bias. + + The following pads are present on Tegra210 + audio, audio-hv, cam, csia, csib, csic, csid, csie, csif, dbg, + debug-nonao, dmic, dp, dsi, dsib, dsic, dsid, emmc, emmc2, gpio, hdmi, + hsic, lvds, mipi-bias, pex-bias, pex-clk1, pex-clk2, pex-cntrl, sdmmc1, + sdmmc3, spi, spi-hv, uart, usb0, usb1, usb2, usb3, usb-bias. + + properties: + pins: + $ref: /schemas/types.yaml#/definitions/string + description: Must contain name of the pad(s) to be configured. + + low-power-enable: + $ref: /schemas/types.yaml#/definitions/flag + description: Configure the pad into power down mode. + + low-power-disable: + $ref: /schemas/types.yaml#/definitions/flag + description: Configure the pad into active mode. + + power-source: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + Must contain either TEGRA_IO_PAD_VOLTAGE_1V8 or + TEGRA_IO_PAD_VOLTAGE_3V3 to select between signaling voltages. + The values are defined in + include/dt-bindings/pinctrl/pinctrl-tegra-io-pad.h. + Power state can be configured on all Tegra124 and Tegra132 + pads. None of the Tegra124 or Tegra132 pads support signaling + voltage switching. + All of the listed Tegra210 pads except pex-cntrl support power + state configuration. Signaling voltage switching is supported + on below Tegra210 pads. + audio, audio-hv, cam, dbg, dmic, gpio, pex-cntrl, sdmmc1, + sdmmc3, spi, spi-hv, and uart. + + required: + - pins + + additionalProperties: false + +required: + - compatible + - reg + - clock-names + - clocks + +dependencies: + "nvidia,suspend-mode": ["nvidia,core-pwr-off-time", "nvidia,cpu-pwr-off-time"] + "nvidia,core-pwr-off-time": ["nvidia,core-pwr-good-time"] + "nvidia,cpu-pwr-off-time": ["nvidia,cpu-pwr-good-time"] + +examples: + - | + + #include + #include + + tegra_pmc: pmc@7000e400 { + compatible = "nvidia,tegra210-pmc"; + reg = <0x0 0x7000e400 0x0 0x400>; + clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>; + clock-names = "pclk", "clk32k_in"; + + nvidia,invert-interrupt; + nvidia,suspend-mode = <0>; + nvidia,cpu-pwr-good-time = <0>; + nvidia,cpu-pwr-off-time = <0>; + nvidia,core-pwr-good-time = <4587 3876>; + nvidia,core-pwr-off-time = <39065>; + nvidia,core-power-req-active-high; + nvidia,sys-clock-req-active-high; + + powergates { + pd_audio: aud { + clocks = <&tegra_car TEGRA210_CLK_APE>, + <&tegra_car TEGRA210_CLK_APB2APE>; + resets = <&tegra_car 198>; + #power-domain-cells = <0>; + }; + + pd_xusbss: xusba { + clocks = <&tegra_car TEGRA210_CLK_XUSB_SS>; + resets = <&tegra_car TEGRA210_CLK_XUSB_SS>; + #power-domain-cells = <0>; + }; + }; + }; -- cgit v1.2.3 From f85fa3198dfc4e359cc6efa58854853b0824bae8 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 13 Jan 2020 23:24:12 -0800 Subject: dt-bindings: soc: tegra-pmc: Add Tegra PMC clock bindings Tegra PMC has 3 clocks clk_out_1, clk_out_2, and clk_out_3. This patch documents PMC clock bindings and adds a header defining Tegra PMC clock ids. Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko Reviewed-by: Rob Herring Signed-off-by: Sowjanya Komatineni Signed-off-by: Thierry Reding --- .../devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml index 3ff34b348141..5b5c42a00264 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml @@ -40,6 +40,15 @@ properties: Must contain an entry for each entry in clock-names. See ../clocks/clocks-bindings.txt for details. + '#clock-cells': + const: 1 + description: + Tegra PMC has clk_out_1, clk_out_2, and clk_out_3. + Consumer of PMC clock should specify the desired clock by having + the clock ID in its "clocks" phandle cell with pmc clock provider. + See include/dt-bindings/soc/tegra-pmc.h for the list of Tegra PMC + clock IDs. + '#interrupt-cells': const: 2 description: @@ -296,6 +305,7 @@ required: - reg - clock-names - clocks + - '#clock-cells' dependencies: "nvidia,suspend-mode": ["nvidia,core-pwr-off-time", "nvidia,cpu-pwr-off-time"] @@ -307,12 +317,14 @@ examples: #include #include + #include tegra_pmc: pmc@7000e400 { compatible = "nvidia,tegra210-pmc"; reg = <0x0 0x7000e400 0x0 0x400>; clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>; clock-names = "pclk", "clk32k_in"; + #clock-cells = <1>; nvidia,invert-interrupt; nvidia,suspend-mode = <0>; -- cgit v1.2.3 From cd88f16792011a90aa9cda12233f136a528acab3 Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Mon, 13 Jan 2020 23:24:14 -0800 Subject: dt-bindings: soc: tegra-pmc: Add ID for Tegra PMC 32 kHz blink clock Tegra PMC has blink functionality that allows 32 kHz clock out to blink pin of the Tegra. This patch adds id for this blink clock to use for enabling or disabling blink output through device tree. Tested-by: Dmitry Osipenko Reviewed-by: Dmitry Osipenko Acked-by: Rob Herring Signed-off-by: Sowjanya Komatineni Signed-off-by: Thierry Reding --- Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml index 5b5c42a00264..f17bb353f65e 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.yaml @@ -44,6 +44,8 @@ properties: const: 1 description: Tegra PMC has clk_out_1, clk_out_2, and clk_out_3. + PMC also has blink control which allows 32Khz clock output to + Tegra blink pad. Consumer of PMC clock should specify the desired clock by having the clock ID in its "clocks" phandle cell with pmc clock provider. See include/dt-bindings/soc/tegra-pmc.h for the list of Tegra PMC -- cgit v1.2.3 From 305ce1dce9d8046d44a322cb3ef5a6574cf86721 Mon Sep 17 00:00:00 2001 From: Argus Lin Date: Sun, 16 Feb 2020 14:17:21 +0800 Subject: dt-bindings: pwrap: mediatek: add pwrap support for MT6779 Add binding document of pwrap for MT6779 SoCs. Signed-off-by: Argus Lin Acked-by: Rob Herring Signed-off-by: Matthias Brugger --- Documentation/devicetree/bindings/soc/mediatek/pwrap.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt b/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt index 7a32404c6114..ecac2bbeae45 100644 --- a/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt +++ b/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt @@ -20,6 +20,7 @@ Required properties in pwrap device node. - compatible: "mediatek,mt2701-pwrap" for MT2701/7623 SoCs "mediatek,mt6765-pwrap" for MT6765 SoCs + "mediatek,mt6779-pwrap" for MT6779 SoCs "mediatek,mt6797-pwrap" for MT6797 SoCs "mediatek,mt7622-pwrap" for MT7622 SoCs "mediatek,mt8135-pwrap" for MT8135 SoCs -- cgit v1.2.3 From 8b59e642d05f0ae9800b057350c063fe7debd6bc Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Tue, 18 Feb 2020 21:51:52 +0800 Subject: ASoC: rt5682: Add DAI clock binding info for WCLK/BCLK CCF usage This patch describes that rt5682 can expose WCLK and BCLK clocks and how to use. Signed-off-by: Derek Fang Link: https://lore.kernel.org/r/1582033912-6841-2-git-send-email-derek.fang@realtek.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/rt5682.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/rt5682.txt b/Documentation/devicetree/bindings/sound/rt5682.txt index 30e927a28369..ac98151d29e4 100644 --- a/Documentation/devicetree/bindings/sound/rt5682.txt +++ b/Documentation/devicetree/bindings/sound/rt5682.txt @@ -32,6 +32,12 @@ Optional properties: The delay time is realtek,btndet-delay value multiple of 8.192 ms. If absent, the default is 16. +- #clock-cells : Should be set to '<1>', wclk and bclk sources provided. +- clock-output-names : Name given for DAI clocks output. + +- clocks : phandle and clock specifier for codec MCLK. +- clock-names : Clock name string for 'clocks' attribute, should be "mclk". + Pins on the device (for linking into audio routes) for RT5682: * DMIC L1 @@ -53,4 +59,10 @@ rt5682 { realtek,dmic1-clk-pin = <1>; realtek,jd-src = <1>; realtek,btndet-delay = <16>; + + #clock-cells = <1>; + clock-output-names = "rt5682-dai-wclk", "rt5682-dai-bclk"; + + clocks = <&osc>; + clock-names = "mclk"; }; -- cgit v1.2.3 From 874ddbce487f077c46957e44e4115b3d82f62c92 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 19 Feb 2020 01:59:53 -0500 Subject: documentation: vm: Advertise support for pte_special in riscv Risc-V architecture has actually supported pte_special since its merge upstream, simply add this info to the documentation. Signed-off-by: Alexandre Ghiti Signed-off-by: Jonathan Corbet --- Documentation/features/vm/pte_special/arch-support.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt index 2dc5df6a1cf5..3d492a34c8ee 100644 --- a/Documentation/features/vm/pte_special/arch-support.txt +++ b/Documentation/features/vm/pte_special/arch-support.txt @@ -23,7 +23,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | ok | | sparc: | ok | -- cgit v1.2.3 From 2d5dfb5911cb0eed0a9a91ea404ad963f18e5aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Tue, 18 Feb 2020 17:38:25 +0100 Subject: docs: arm: tcm: Fix a few typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Neuschäfer Signed-off-by: Jonathan Corbet --- Documentation/arm/tcm.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/arm/tcm.rst b/Documentation/arm/tcm.rst index effd9c7bc968..b256f9783883 100644 --- a/Documentation/arm/tcm.rst +++ b/Documentation/arm/tcm.rst @@ -4,18 +4,18 @@ ARM TCM (Tightly-Coupled Memory) handling in Linux Written by Linus Walleij -Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory). +Some ARM SoCs have a so-called TCM (Tightly-Coupled Memory). This is usually just a few (4-64) KiB of RAM inside the ARM processor. -Due to being embedded inside the CPU The TCM has a +Due to being embedded inside the CPU, the TCM has a Harvard-architecture, so there is an ITCM (instruction TCM) and a DTCM (data TCM). The DTCM can not contain any instructions, but the ITCM can actually contain data. The size of DTCM or ITCM is minimum 4KiB so the typical minimum configuration is 4KiB ITCM and 4KiB DTCM. -ARM CPU:s have special registers to read out status, physical +ARM CPUs have special registers to read out status, physical location and size of TCM memories. arch/arm/include/asm/cputype.h defines a CPUID_TCM register that you can read out from the system control coprocessor. Documentation from ARM can be found -- cgit v1.2.3 From fb2511247dc4061fd122d0195838278a4a0b7b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Tue, 18 Feb 2020 16:02:19 +0100 Subject: docs: Fix path to MTD command line partition parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cmdlinepart.c has been moved to drivers/mtd/parsers/. Fixes: a3f12a35c91d ("mtd: parsers: Move CMDLINE parser") Signed-off-by: Jonathan Neuschäfer Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..47cd55e339a5 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2791,7 +2791,7 @@ ,[,,,,] mtdparts= [MTD] - See drivers/mtd/cmdlinepart.c. + See drivers/mtd/parsers/cmdlinepart.c multitce=off [PPC] This parameter disables the use of the pSeries firmware feature for updating multiple TCE entries -- cgit v1.2.3 From f52153ab383f04a45c38d8a7f55a4249477b20df Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:05 +0530 Subject: Documentation/ABI: Add ABI documentation for /sys/kernel/fadump_* Add missing ABI documentation for existing FADump sysfs files. Signed-off-by: Sourabh Jain Reviewed-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-2-sourabhjain@linux.ibm.com --- Documentation/ABI/testing/sysfs-kernel-fadump_enabled | 7 +++++++ Documentation/ABI/testing/sysfs-kernel-fadump_registered | 8 ++++++++ Documentation/ABI/testing/sysfs-kernel-fadump_release_mem | 8 ++++++++ Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore | 7 +++++++ 4 files changed, 30 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_enabled create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_registered create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_release_mem create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_enabled b/Documentation/ABI/testing/sysfs-kernel-fadump_enabled new file mode 100644 index 000000000000..f73632b1c006 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump_enabled @@ -0,0 +1,7 @@ +What: /sys/kernel/fadump_enabled +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Primarily used to identify whether the FADump is enabled in + the kernel or not. +User: Kdump service diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_registered b/Documentation/ABI/testing/sysfs-kernel-fadump_registered new file mode 100644 index 000000000000..dcf925e53f0f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump_registered @@ -0,0 +1,8 @@ +What: /sys/kernel/fadump_registered +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Helps to control the dump collect feature from userspace. + Setting 1 to this file enables the system to collect the + dump and 0 to disable it. +User: Kdump service diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem b/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem new file mode 100644 index 000000000000..9c20d64ab48d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem @@ -0,0 +1,8 @@ +What: /sys/kernel/fadump_release_mem +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + This is a special sysfs file and only available when + the system is booted to capture the vmcore using FADump. + It is used to release the memory reserved by FADump to + save the crash dump. diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore b/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore new file mode 100644 index 000000000000..53313c1d4e7a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore @@ -0,0 +1,7 @@ +What: /sys/kernel/fadump_release_opalcore +Date: Sep 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + The sysfs file is available when the system is booted to + collect the dump on OPAL based machine. It used to release + the memory used to collect the opalcore. -- cgit v1.2.3 From d418b19f34ed0c751a69810080596f7e749595aa Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:07 +0530 Subject: powerpc/fadump: Reorganize /sys/kernel/fadump_* sysfs files As the number of FADump sysfs files increases it is hard to manage all of them inside /sys/kernel directory. It's better to have all the FADump related sysfs files in a dedicated directory /sys/kernel/fadump. But in order to maintain backward compatibility a symlink has been added for every sysfs that has moved to new location. As the FADump sysfs files are now part of a dedicated directory there is no need to prefix their name with fadump_, hence sysfs file names are also updated. For example fadump_enabled sysfs file is now referred as enabled. Also consolidate ABI documentation for all the FADump sysfs files in a single file Documentation/ABI/testing/sysfs-kernel-fadump. Signed-off-by: Sourabh Jain Tested-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-4-sourabhjain@linux.ibm.com --- Documentation/ABI/testing/sysfs-kernel-fadump | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump new file mode 100644 index 000000000000..5d988b919e81 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fadump @@ -0,0 +1,33 @@ +What: /sys/kernel/fadump/* +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: + The /sys/kernel/fadump/* is a collection of FADump sysfs + file provide information about the configuration status + of Firmware Assisted Dump (FADump). + +What: /sys/kernel/fadump/enabled +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Primarily used to identify whether the FADump is enabled in + the kernel or not. +User: Kdump service + +What: /sys/kernel/fadump/registered +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Helps to control the dump collect feature from userspace. + Setting 1 to this file enables the system to collect the + dump and 0 to disable it. +User: Kdump service + +What: /sys/kernel/fadump/release_mem +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + This is a special sysfs file and only available when + the system is booted to capture the vmcore using FADump. + It is used to release the memory reserved by FADump to + save the crash dump. -- cgit v1.2.3 From 8852c07a881b0acfd3d75cf3927adaab815c4ee5 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:08 +0530 Subject: powerpc/powernv: Move core and fadump_release_opalcore under new kobject The /sys/firmware/opal/core and /sys/kernel/fadump_release_opalcore sysfs files are used to export and release the OPAL memory on PowerNV platform. let's organize them into a new kobject under /sys/firmware/opal/mpipl/ directory. A symlink is added to maintain the backward compatibility for /sys/firmware/opal/core sysfs file. Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-5-sourabhjain@linux.ibm.com --- .../ABI/removed/sysfs-kernel-fadump_release_opalcore | 9 +++++++++ .../ABI/testing/sysfs-kernel-fadump_release_opalcore | 7 ------- Documentation/powerpc/firmware-assisted-dump.rst | 15 +++++++++------ 3 files changed, 18 insertions(+), 13 deletions(-) create mode 100644 Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore delete mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore (limited to 'Documentation') diff --git a/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore new file mode 100644 index 000000000000..a8d46cd0f4e6 --- /dev/null +++ b/Documentation/ABI/removed/sysfs-kernel-fadump_release_opalcore @@ -0,0 +1,9 @@ +This ABI is moved to /sys/firmware/opal/mpipl/release_core. + +What: /sys/kernel/fadump_release_opalcore +Date: Sep 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + The sysfs file is available when the system is booted to + collect the dump on OPAL based machine. It used to release + the memory used to collect the opalcore. diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore b/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore deleted file mode 100644 index 53313c1d4e7a..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-fadump_release_opalcore +++ /dev/null @@ -1,7 +0,0 @@ -What: /sys/kernel/fadump_release_opalcore -Date: Sep 2019 -Contact: linuxppc-dev@lists.ozlabs.org -Description: write only - The sysfs file is available when the system is booted to - collect the dump on OPAL based machine. It used to release - the memory used to collect the opalcore. diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst index 0455a78486d5..345a3405206e 100644 --- a/Documentation/powerpc/firmware-assisted-dump.rst +++ b/Documentation/powerpc/firmware-assisted-dump.rst @@ -112,13 +112,13 @@ to ensure that crash data is preserved to process later. -- On OPAL based machines (PowerNV), if the kernel is build with CONFIG_OPAL_CORE=y, OPAL memory at the time of crash is also - exported as /sys/firmware/opal/core file. This procfs file is + exported as /sys/firmware/opal/mpipl/core file. This procfs file is helpful in debugging OPAL crashes with GDB. The kernel memory used for exporting this procfs file can be released by echo'ing - '1' to /sys/kernel/fadump_release_opalcore node. + '1' to /sys/firmware/opal/mpipl/release_core node. e.g. - # echo 1 > /sys/kernel/fadump_release_opalcore + # echo 1 > /sys/firmware/opal/mpipl/release_core Implementation details: ----------------------- @@ -283,14 +283,17 @@ Here is the list of files under kernel sysfs: enhanced to use this interface to release the memory reserved for dump and continue without 2nd reboot. - /sys/kernel/fadump_release_opalcore +Note: /sys/kernel/fadump_release_opalcore sysfs has moved to + /sys/firmware/opal/mpipl/release_core + + /sys/firmware/opal/mpipl/release_core This file is available only on OPAL based machines when FADump is active during capture kernel. This is used to release the memory - used by the kernel to export /sys/firmware/opal/core file. To + used by the kernel to export /sys/firmware/opal/mpipl/core file. To release this memory, echo '1' to it: - echo 1 > /sys/kernel/fadump_release_opalcore + echo 1 > /sys/firmware/opal/mpipl/release_core Here is the list of files under powerpc debugfs: (Assuming debugfs is mounted on /sys/kernel/debug directory.) -- cgit v1.2.3 From a3cb66a508528e9082cba8303b4f31767e7743a2 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 18 Feb 2020 13:59:16 +0100 Subject: docs: pretty up sysctl/kernel.rst This updates sysctl/kernel.rst to use ReStructured Text more fully: * the list of files is now the table of contents (old entries with no corresponding sections are added as empty sections for now); * code references and commands are formatted as code, except for function names which end up linked to the appropriate documentation; * links are used to point to other documentation and other sections; * tables are used to make lists of values more readable (as already done for some sections); * in heavily-reworked paragraphs, sentences are wrapped individually, to make future diffs easier to read. The first mention of the kernel version is dropped. The second mention, saying that the document is accurate for 2.2, is preserved for now; I will update that once the document really is accurate for a current kernel release. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 987 ++++++++++++++-------------- 1 file changed, 492 insertions(+), 495 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index b08ba4e63291..4872610cc491 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -2,263 +2,187 @@ Documentation for /proc/sys/kernel/ =================================== -kernel version 2.2.10 - Copyright (c) 1998, 1999, Rik van Riel Copyright (c) 2009, Shen Feng -For general info and legal blurb, please look in index.rst. +For general info and legal blurb, please look in :doc:`index`. ------------------------------------------------------------------------------ This file contains documentation for the sysctl files in -/proc/sys/kernel/ and is valid for Linux kernel version 2.2. +``/proc/sys/kernel/`` and is valid for Linux kernel version 2.2. The files in this directory can be used to tune and monitor miscellaneous and general things in the operation of the Linux -kernel. Since some of the files _can_ be used to screw up your +kernel. Since some of the files *can* be used to screw up your system, it is advisable to read both documentation and source before actually making adjustments. Currently, these files might (depending on your configuration) -show up in /proc/sys/kernel: - -- acct -- acpi_video_flags -- auto_msgmni -- bootloader_type [ X86 only ] -- bootloader_version [ X86 only ] -- cap_last_cap -- core_pattern -- core_pipe_limit -- core_uses_pid -- ctrl-alt-del -- dmesg_restrict -- domainname -- hostname -- hotplug -- hardlockup_all_cpu_backtrace -- hardlockup_panic -- hung_task_panic -- hung_task_check_count -- hung_task_timeout_secs -- hung_task_check_interval_secs -- hung_task_warnings -- hyperv_record_panic_msg -- kexec_load_disabled -- kptr_restrict -- l2cr [ PPC only ] -- modprobe ==> Documentation/debugging-modules.txt -- modules_disabled -- msg_next_id [ sysv ipc ] -- msgmax -- msgmnb -- msgmni -- nmi_watchdog -- osrelease -- ostype -- overflowgid -- overflowuid -- panic -- panic_on_oops -- panic_on_stackoverflow -- panic_on_unrecovered_nmi -- panic_on_warn -- panic_print -- panic_on_rcu_stall -- perf_cpu_time_max_percent -- perf_event_paranoid -- perf_event_max_stack -- perf_event_mlock_kb -- perf_event_max_contexts_per_stack -- pid_max -- powersave-nap [ PPC only ] -- printk -- printk_delay -- printk_ratelimit -- printk_ratelimit_burst -- pty ==> Documentation/filesystems/devpts.txt -- randomize_va_space -- real-root-dev ==> Documentation/admin-guide/initrd.rst -- reboot-cmd [ SPARC only ] -- rtsig-max -- rtsig-nr -- sched_energy_aware -- seccomp/ ==> Documentation/userspace-api/seccomp_filter.rst -- sem -- sem_next_id [ sysv ipc ] -- sg-big-buff [ generic SCSI device (sg) ] -- shm_next_id [ sysv ipc ] -- shm_rmid_forced -- shmall -- shmmax [ sysv ipc ] -- shmmni -- softlockup_all_cpu_backtrace -- soft_watchdog -- stack_erasing -- stop-a [ SPARC only ] -- sysrq ==> Documentation/admin-guide/sysrq.rst -- sysctl_writes_strict -- tainted ==> Documentation/admin-guide/tainted-kernels.rst -- threads-max -- unknown_nmi_panic -- watchdog -- watchdog_thresh -- version - - -acct: -===== +show up in ``/proc/sys/kernel``: + +.. contents:: :local: + + +acct +==== + +:: -highwater lowwater frequency + highwater lowwater frequency If BSD-style process accounting is enabled these values control its behaviour. If free space on filesystem where the log lives -goes below % accounting suspends. If free space gets -above % accounting resumes. determines +goes below ``lowwater``% accounting suspends. If free space gets +above ``highwater``% accounting resumes. ``frequency`` determines how often do we check the amount of free space (value is in seconds). Default: -4 2 30 -That is, suspend accounting if there left <= 2% free; resume it -if we got >=4%; consider information about amount of free space -valid for 30 seconds. +:: -acpi_video_flags: -================= + 4 2 30 -flags +That is, suspend accounting if free space drops below 2%; resume it +if it increases to at least 4%; consider information about amount of +free space valid for 30 seconds. -See Doc*/kernel/power/video.txt, it allows mode of video boot to be -set during run time. +acpi_video_flags +================ + +See Documentation/kernel/power/video.txt, it allows mode of video boot +to be set during run time. -auto_msgmni: -============ + +auto_msgmni +=========== This variable has no effect and may be removed in future kernel releases. Reading it always returns 0. -Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni -upon memory add/remove or upon ipc namespace creation/removal. +Up to Linux 3.17, it enabled/disabled automatic recomputing of +`msgmni`_ +upon memory add/remove or upon IPC namespace creation/removal. Echoing "1" into this file enabled msgmni automatic recomputing. -Echoing "0" turned it off. auto_msgmni default value was 1. - +Echoing "0" turned it off. The default value was 1. -bootloader_type: -================ -x86 bootloader identification +bootloader_type (x86 only) +========================== This gives the bootloader type number as indicated by the bootloader, shifted left by 4, and OR'd with the low four bits of the bootloader version. The reason for this encoding is that this used to match the -type_of_loader field in the kernel header; the encoding is kept for +``type_of_loader`` field in the kernel header; the encoding is kept for backwards compatibility. That is, if the full bootloader type number is 0x15 and the full version number is 0x234, this file will contain the value 340 = 0x154. -See the type_of_loader and ext_loader_type fields in -Documentation/x86/boot.rst for additional information. - +See the ``type_of_loader`` and ``ext_loader_type`` fields in +:doc:`/x86/boot` for additional information. -bootloader_version: -=================== -x86 bootloader version +bootloader_version (x86 only) +============================= The complete bootloader version number. In the example above, this file will contain the value 564 = 0x234. -See the type_of_loader and ext_loader_ver fields in -Documentation/x86/boot.rst for additional information. +See the ``type_of_loader`` and ``ext_loader_ver`` fields in +:doc:`/x86/boot` for additional information. -cap_last_cap: -============= +cap_last_cap +============ Highest valid capability of the running kernel. Exports -CAP_LAST_CAP from the kernel. +``CAP_LAST_CAP`` from the kernel. -core_pattern: -============= +core_pattern +============ -core_pattern is used to specify a core dumpfile pattern name. +``core_pattern`` is used to specify a core dumpfile pattern name. * max length 127 characters; default value is "core" -* core_pattern is used as a pattern template for the output filename; - certain string patterns (beginning with '%') are substituted with - their actual values. -* backward compatibility with core_uses_pid: +* ``core_pattern`` is used as a pattern template for the output + filename; certain string patterns (beginning with '%') are + substituted with their actual values. +* backward compatibility with ``core_uses_pid``: - If core_pattern does not include "%p" (default does not) - and core_uses_pid is set, then .PID will be appended to + If ``core_pattern`` does not include "%p" (default does not) + and ``core_uses_pid`` is set, then .PID will be appended to the filename. -* corename format specifiers:: - - % '%' is dropped - %% output one '%' - %p pid - %P global pid (init PID namespace) - %i tid - %I global tid (init PID namespace) - %u uid (in initial user namespace) - %g gid (in initial user namespace) - %d dump mode, matches PR_SET_DUMPABLE and - /proc/sys/fs/suid_dumpable - %s signal number - %t UNIX time of dump - %h hostname - %e executable filename (may be shortened) - %E executable path - %c maximum size of core file by resource limit RLIMIT_CORE - % both are dropped +* corename format specifiers + + ======== ========================================== + % '%' is dropped + %% output one '%' + %p pid + %P global pid (init PID namespace) + %i tid + %I global tid (init PID namespace) + %u uid (in initial user namespace) + %g gid (in initial user namespace) + %d dump mode, matches ``PR_SET_DUMPABLE`` and + ``/proc/sys/fs/suid_dumpable`` + %s signal number + %t UNIX time of dump + %h hostname + %e executable filename (may be shortened) + %E executable path + %c maximum size of core file by resource limit RLIMIT_CORE + % both are dropped + ======== ========================================== * If the first character of the pattern is a '|', the kernel will treat the rest of the pattern as a command to run. The core dump will be written to the standard input of that program instead of to a file. -core_pipe_limit: -================ +core_pipe_limit +=============== -This sysctl is only applicable when core_pattern is configured to pipe -core files to a user space helper (when the first character of -core_pattern is a '|', see above). When collecting cores via a pipe -to an application, it is occasionally useful for the collecting -application to gather data about the crashing process from its -/proc/pid directory. In order to do this safely, the kernel must wait -for the collecting process to exit, so as not to remove the crashing -processes proc files prematurely. This in turn creates the -possibility that a misbehaving userspace collecting process can block -the reaping of a crashed process simply by never exiting. This sysctl -defends against that. It defines how many concurrent crashing -processes may be piped to user space applications in parallel. If -this value is exceeded, then those crashing processes above that value -are noted via the kernel log and their cores are skipped. 0 is a -special value, indicating that unlimited processes may be captured in -parallel, but that no waiting will take place (i.e. the collecting -process is not guaranteed access to /proc//). This -value defaults to 0. - - -core_uses_pid: -============== +This sysctl is only applicable when `core_pattern`_ is configured to +pipe core files to a user space helper (when the first character of +``core_pattern`` is a '|', see above). +When collecting cores via a pipe to an application, it is occasionally +useful for the collecting application to gather data about the +crashing process from its ``/proc/pid`` directory. +In order to do this safely, the kernel must wait for the collecting +process to exit, so as not to remove the crashing processes proc files +prematurely. +This in turn creates the possibility that a misbehaving userspace +collecting process can block the reaping of a crashed process simply +by never exiting. +This sysctl defends against that. +It defines how many concurrent crashing processes may be piped to user +space applications in parallel. +If this value is exceeded, then those crashing processes above that +value are noted via the kernel log and their cores are skipped. +0 is a special value, indicating that unlimited processes may be +captured in parallel, but that no waiting will take place (i.e. the +collecting process is not guaranteed access to ``/proc//``). +This value defaults to 0. + + +core_uses_pid +============= The default coredump filename is "core". By setting -core_uses_pid to 1, the coredump filename becomes core.PID. -If core_pattern does not include "%p" (default does not) -and core_uses_pid is set, then .PID will be appended to +``core_uses_pid`` to 1, the coredump filename becomes core.PID. +If `core_pattern`_ does not include "%p" (default does not) +and ``core_uses_pid`` is set, then .PID will be appended to the filename. -ctrl-alt-del: -============= +ctrl-alt-del +============ When the value in this file is 0, ctrl-alt-del is trapped and -sent to the init(1) program to handle a graceful restart. +sent to the ``init(1)`` program to handle a graceful restart. When, however, the value is > 0, Linux's reaction to a Vulcan Nerve Pinch (tm) will be an immediate reboot, without even syncing its dirty buffers. @@ -270,21 +194,22 @@ Note: to decide what to do with it. -dmesg_restrict: -=============== +dmesg_restrict +============== This toggle indicates whether unprivileged users are prevented -from using dmesg(8) to view messages from the kernel's log buffer. -When dmesg_restrict is set to (0) there are no restrictions. When -dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use -dmesg(8). +from using ``dmesg(8)`` to view messages from the kernel's log +buffer. +When ``dmesg_restrict`` is set to 0 there are no restrictions. +When ``dmesg_restrict`` is set set to 1, users must have +``CAP_SYSLOG`` to use ``dmesg(8)``. -The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the -default value of dmesg_restrict. +The kernel config option ``CONFIG_SECURITY_DMESG_RESTRICT`` sets the +default value of ``dmesg_restrict``. -domainname & hostname: -====================== +domainname & hostname +===================== These files can be used to set the NIS/YP domainname and the hostname of your box in exactly the same way as the commands @@ -303,167 +228,192 @@ hostname "darkstar" and DNS (Internet Domain Name Server) domainname "frop.org", not to be confused with the NIS (Network Information Service) or YP (Yellow Pages) domainname. These two domain names are in general different. For a detailed discussion -see the hostname(1) man page. +see the ``hostname(1)`` man page. -hardlockup_all_cpu_backtrace: -============================= +hardlockup_all_cpu_backtrace +============================ This value controls the hard lockup detector behavior when a hard lockup condition is detected as to whether or not to gather further debug information. If enabled, arch-specific all-CPU stack dumping will be initiated. -0: do nothing. This is the default behavior. - -1: on detection capture more debug information. += ============================================ +0 Do nothing. This is the default behavior. +1 On detection capture more debug information. += ============================================ -hardlockup_panic: -================= +hardlockup_panic +================ This parameter can be used to control whether the kernel panics when a hard lockup is detected. - 0 - don't panic on hard lockup - 1 - panic on hard lockup += =========================== +0 Don't panic on hard lockup. +1 Panic on hard lockup. += =========================== -See Documentation/admin-guide/lockup-watchdogs.rst for more information. This can -also be set using the nmi_watchdog kernel parameter. +See :doc:`/admin-guide/lockup-watchdogs` for more information. +This can also be set using the nmi_watchdog kernel parameter. -hotplug: -======== +hotplug +======= Path for the hotplug policy agent. -Default value is "/sbin/hotplug". +Default value is "``/sbin/hotplug``". -hung_task_panic: -================ +hung_task_panic +=============== Controls the kernel's behavior when a hung task is detected. -This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. +This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled. -0: continue operation. This is the default behavior. += ================================================= +0 Continue operation. This is the default behavior. +1 Panic immediately. += ================================================= -1: panic immediately. - -hung_task_check_count: -====================== +hung_task_check_count +===================== The upper bound on the number of tasks that are checked. -This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. +This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled. -hung_task_timeout_secs: -======================= +hung_task_timeout_secs +====================== When a task in D state did not get scheduled for more than this value report a warning. -This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. +This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled. -0: means infinite timeout - no checking done. +0 means infinite timeout, no checking is done. -Possible values to set are in range {0..LONG_MAX/HZ}. +Possible values to set are in range {0:``LONG_MAX``/``HZ``}. -hung_task_check_interval_secs: -============================== +hung_task_check_interval_secs +============================= Hung task check interval. If hung task checking is enabled -(see hung_task_timeout_secs), the check is done every -hung_task_check_interval_secs seconds. -This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. +(see `hung_task_timeout_secs`_), the check is done every +``hung_task_check_interval_secs`` seconds. +This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled. -0 (default): means use hung_task_timeout_secs as checking interval. -Possible values to set are in range {0..LONG_MAX/HZ}. +0 (default) means use ``hung_task_timeout_secs`` as checking +interval. +Possible values to set are in range {0:``LONG_MAX``/``HZ``}. -hung_task_warnings: -=================== + +hung_task_warnings +================== The maximum number of warnings to report. During a check interval if a hung task is detected, this value is decreased by 1. When this value reaches 0, no more warnings will be reported. -This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. +This file shows up if ``CONFIG_DETECT_HUNG_TASK`` is enabled. -1: report an infinite number of warnings. -hyperv_record_panic_msg: -======================== +hyperv_record_panic_msg +======================= Controls whether the panic kmsg data should be reported to Hyper-V. -0: do not report panic kmsg data. += ========================================================= +0 Do not report panic kmsg data. +1 Report the panic kmsg data. This is the default behavior. += ========================================================= -1: report the panic kmsg data. This is the default behavior. +kexec_load_disabled +=================== -kexec_load_disabled: -==================== - -A toggle indicating if the kexec_load syscall has been disabled. This -value defaults to 0 (false: kexec_load enabled), but can be set to 1 -(true: kexec_load disabled). Once true, kexec can no longer be used, and -the toggle cannot be set back to false. This allows a kexec image to be -loaded before disabling the syscall, allowing a system to set up (and -later use) an image without it being altered. Generally used together -with the "modules_disabled" sysctl. +A toggle indicating if the ``kexec_load`` syscall has been disabled. +This value defaults to 0 (false: ``kexec_load`` enabled), but can be +set to 1 (true: ``kexec_load`` disabled). +Once true, kexec can no longer be used, and the toggle cannot be set +back to false. +This allows a kexec image to be loaded before disabling the syscall, +allowing a system to set up (and later use) an image without it being +altered. +Generally used together with the `modules_disabled`_ sysctl. -kptr_restrict: -============== +kptr_restrict +============= This toggle indicates whether restrictions are placed on -exposing kernel addresses via /proc and other interfaces. - -When kptr_restrict is set to 0 (the default) the address is hashed before -printing. (This is the equivalent to %p.) +exposing kernel addresses via ``/proc`` and other interfaces. + +When ``kptr_restrict`` is set to 0 (the default) the address is hashed +before printing. +(This is the equivalent to %p.) + +When ``kptr_restrict`` is set to 1, kernel pointers printed using the +%pK format specifier will be replaced with 0s unless the user has +``CAP_SYSLOG`` and effective user and group ids are equal to the real +ids. +This is because %pK checks are done at read() time rather than open() +time, so if permissions are elevated between the open() and the read() +(e.g via a setuid binary) then %pK will not leak kernel pointers to +unprivileged users. +Note, this is a temporary solution only. +The correct long-term solution is to do the permission checks at +open() time. +Consider removing world read permissions from files that use %pK, and +using `dmesg_restrict`_ to protect against uses of %pK in ``dmesg(8)`` +if leaking kernel pointer values to unprivileged users is a concern. + +When ``kptr_restrict`` is set to 2, kernel pointers printed using +%pK will be replaced with 0s regardless of privileges. + + +l2cr (PPC only) +=============== -When kptr_restrict is set to (1), kernel pointers printed using the %pK -format specifier will be replaced with 0's unless the user has CAP_SYSLOG -and effective user and group ids are equal to the real ids. This is -because %pK checks are done at read() time rather than open() time, so -if permissions are elevated between the open() and the read() (e.g via -a setuid binary) then %pK will not leak kernel pointers to unprivileged -users. Note, this is a temporary solution only. The correct long-term -solution is to do the permission checks at open() time. Consider removing -world read permissions from files that use %pK, and using dmesg_restrict -to protect against uses of %pK in dmesg(8) if leaking kernel pointer -values to unprivileged users is a concern. +This flag controls the L2 cache of G3 processor boards. If +0, the cache is disabled. Enabled if nonzero. -When kptr_restrict is set to (2), kernel pointers printed using -%pK will be replaced with 0's regardless of privileges. +modprobe +======== -l2cr: (PPC only) -================ - -This flag controls the L2 cache of G3 processor boards. If -0, the cache is disabled. Enabled if nonzero. +See Documentation/debugging-modules.txt. -modules_disabled: -================= +modules_disabled +================ A toggle value indicating if modules are allowed to be loaded in an otherwise modular kernel. This toggle defaults to off (0), but can be set true (1). Once true, modules can be neither loaded nor unloaded, and the toggle cannot be set back -to false. Generally used with the "kexec_load_disabled" toggle. +to false. Generally used with the `kexec_load_disabled`_ toggle. + +.. _msgmni: -msg_next_id, sem_next_id, and shm_next_id: -========================================== +msgmax, msgmnb, and msgmni +========================== + + +msg_next_id, sem_next_id, and shm_next_id (System V IPC) +======================================================== These three toggles allows to specify desired id for next allocated IPC object: message, semaphore or shared memory respectively. By default they are equal to -1, which means generic allocation logic. -Possible values to set are in range {0..INT_MAX}. +Possible values to set are in range {0:``INT_MAX``}. Notes: 1) kernel doesn't guarantee, that new object will have desired id. So, @@ -473,15 +423,16 @@ Notes: fails, it is undefined if the value remains unmodified or is reset to -1. -nmi_watchdog: -============= +nmi_watchdog +============ This parameter can be used to control the NMI watchdog (i.e. the hard lockup detector) on x86 systems. -0 - disable the hard lockup detector - -1 - enable the hard lockup detector += ================================= +0 Disable the hard lockup detector. +1 Enable the hard lockup detector. += ================================= The hard lockup detector monitors each CPU for its ability to respond to timer interrupts. The mechanism utilizes CPU performance counter registers @@ -493,11 +444,11 @@ in a KVM virtual machine. This default can be overridden by adding:: nmi_watchdog=1 -to the guest kernel command line (see Documentation/admin-guide/kernel-parameters.rst). +to the guest kernel command line (see :doc:`/admin-guide/kernel-parameters`). -numa_balancing: -=============== +numa_balancing +============== Enables/disables automatic page fault based NUMA memory balancing. Memory is moved automatically to nodes @@ -515,9 +466,10 @@ ideally is offset by improved memory locality but there is no universal guarantee. If the target workload is already bound to NUMA nodes then this feature should be disabled. Otherwise, if the system overhead from the feature is too high then the rate the kernel samples for NUMA hinting -faults may be controlled by the numa_balancing_scan_period_min_ms, +faults may be controlled by the `numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, -numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls. +numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls. + numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb =============================================================================================================================== @@ -543,23 +495,23 @@ workload pattern changes and minimises performance impact due to remote memory accesses. These sysctls control the thresholds for scan delays and the number of pages scanned. -numa_balancing_scan_period_min_ms is the minimum time in milliseconds to +``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to scan a tasks virtual memory. It effectively controls the maximum scanning rate for each task. -numa_balancing_scan_delay_ms is the starting "scan delay" used for a task +``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task when it initially forks. -numa_balancing_scan_period_max_ms is the maximum time in milliseconds to +``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to scan a tasks virtual memory. It effectively controls the minimum scanning rate for each task. -numa_balancing_scan_size_mb is how many megabytes worth of pages are +``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are scanned for a given scan. -osrelease, ostype & version: -============================ +osrelease, ostype & version +=========================== :: @@ -570,15 +522,16 @@ osrelease, ostype & version: # cat version #5 Wed Feb 25 21:49:24 MET 1998 -The files osrelease and ostype should be clear enough. Version +The files ``osrelease`` and ``ostype`` should be clear enough. +``version`` needs a little more clarification however. The '#5' means that this is the fifth kernel built from this source base and the date behind it indicates the time the kernel was built. The only way to tune these values is to rebuild the kernel :-) -overflowgid & overflowuid: -========================== +overflowgid & overflowuid +========================= if your architecture did not always support 32-bit UIDs (i.e. arm, i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to @@ -589,108 +542,113 @@ These sysctls allow you to change the value of the fixed UID and GID. The default is 65534. -panic: -====== +panic +===== The value in this file represents the number of seconds the kernel waits before rebooting on a panic. When you use the software watchdog, the recommended setting is 60. -panic_on_io_nmi: -================ +panic_on_io_nmi +=============== Controls the kernel's behavior when a CPU receives an NMI caused by an IO error. -0: try to continue operation (default) - -1: panic immediately. The IO error triggered an NMI. This indicates a - serious system condition which could result in IO data corruption. - Rather than continuing, panicking might be a better choice. Some - servers issue this sort of NMI when the dump button is pushed, - and you can use this option to take a crash dump. += ================================================================== +0 Try to continue operation (default). +1 Panic immediately. The IO error triggered an NMI. This indicates a + serious system condition which could result in IO data corruption. + Rather than continuing, panicking might be a better choice. Some + servers issue this sort of NMI when the dump button is pushed, + and you can use this option to take a crash dump. += ================================================================== -panic_on_oops: -============== +panic_on_oops +============= Controls the kernel's behaviour when an oops or BUG is encountered. -0: try to continue operation - -1: panic immediately. If the `panic` sysctl is also non-zero then the - machine will be rebooted. += =================================================================== +0 Try to continue operation. +1 Panic immediately. If the `panic` sysctl is also non-zero then the + machine will be rebooted. += =================================================================== -panic_on_stackoverflow: -======================= +panic_on_stackoverflow +====================== Controls the kernel's behavior when detecting the overflows of kernel, IRQ and exception stacks except a user stack. -This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled. - -0: try to continue operation. +This file shows up if ``CONFIG_DEBUG_STACKOVERFLOW`` is enabled. -1: panic immediately. += ========================== +0 Try to continue operation. +1 Panic immediately. += ========================== -panic_on_unrecovered_nmi: -========================= +panic_on_unrecovered_nmi +======================== The default Linux behaviour on an NMI of either memory or unknown is to continue operation. For many environments such as scientific computing it is preferable that the box is taken out and the error dealt with than an uncorrected parity/ECC error get propagated. -A small number of systems do generate NMI's for bizarre random reasons +A small number of systems do generate NMIs for bizarre random reasons such as power management so the default is off. That sysctl works like the existing panic controls already in that directory. -panic_on_warn: -============== +panic_on_warn +============= Calls panic() in the WARN() path when set to 1. This is useful to avoid a kernel rebuild when attempting to kdump at the location of a WARN(). -0: only WARN(), default behaviour. - -1: call panic() after printing out WARN() location. += ================================================ +0 Only WARN(), default behaviour. +1 Call panic() after printing out WARN() location. += ================================================ -panic_print: -============ +panic_print +=========== Bitmask for printing system info when panic happens. User can chose combination of the following bits: -===== ======================================== +===== ============================================ bit 0 print all tasks info bit 1 print system memory info bit 2 print timer info -bit 3 print locks info if CONFIG_LOCKDEP is on +bit 3 print locks info if ``CONFIG_LOCKDEP`` is on bit 4 print ftrace buffer -===== ======================================== +===== ============================================ So for example to print tasks and memory info on panic, user can:: echo 3 > /proc/sys/kernel/panic_print -panic_on_rcu_stall: -=================== +panic_on_rcu_stall +================== When set to 1, calls panic() after RCU stall detection messages. This is useful to define the root cause of RCU stalls using a vmcore. -0: do not panic() when RCU stall takes place, default behavior. - -1: panic() after printing RCU stall messages. += ============================================================ +0 Do not panic() when RCU stall takes place, default behavior. +1 panic() after printing RCU stall messages. += ============================================================ -perf_cpu_time_max_percent: -========================== +perf_cpu_time_max_percent +========================= Hints to the kernel how much CPU time it should be allowed to use to handle perf sampling events. If the perf subsystem @@ -703,171 +661,179 @@ unexpectedly take too long to execute, the NMIs can become stacked up next to each other so much that nothing else is allowed to execute. -0: - disable the mechanism. Do not monitor or correct perf's - sampling rate no matter how CPU time it takes. +===== ======================================================== +0 Disable the mechanism. Do not monitor or correct perf's + sampling rate no matter how CPU time it takes. -1-100: - attempt to throttle perf's sample rate to this - percentage of CPU. Note: the kernel calculates an - "expected" length of each sample event. 100 here means - 100% of that expected length. Even if this is set to - 100, you may still see sample throttling if this - length is exceeded. Set to 0 if you truly do not care - how much CPU is consumed. +1-100 Attempt to throttle perf's sample rate to this + percentage of CPU. Note: the kernel calculates an + "expected" length of each sample event. 100 here means + 100% of that expected length. Even if this is set to + 100, you may still see sample throttling if this + length is exceeded. Set to 0 if you truly do not care + how much CPU is consumed. +===== ======================================================== -perf_event_paranoid: -==================== +perf_event_paranoid +=================== Controls use of the performance events system by unprivileged users (without CAP_SYS_ADMIN). The default value is 2. === ================================================================== - -1 Allow use of (almost) all events by all users + -1 Allow use of (almost) all events by all users. - Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK + Ignore mlock limit after perf_event_mlock_kb without + ``CAP_IPC_LOCK``. ->=0 Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN +>=0 Disallow ftrace function tracepoint by users without + ``CAP_SYS_ADMIN``. - Disallow raw tracepoint access by users without CAP_SYS_ADMIN + Disallow raw tracepoint access by users without ``CAP_SYS_ADMIN``. ->=1 Disallow CPU event access by users without CAP_SYS_ADMIN +>=1 Disallow CPU event access by users without ``CAP_SYS_ADMIN``. ->=2 Disallow kernel profiling by users without CAP_SYS_ADMIN +>=2 Disallow kernel profiling by users without ``CAP_SYS_ADMIN``. === ================================================================== -perf_event_max_stack: -===================== +perf_event_max_stack +==================== -Controls maximum number of stack frames to copy for (attr.sample_type & -PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using -'perf record -g' or 'perf trace --call-graph fp'. +Controls maximum number of stack frames to copy for (``attr.sample_type & +PERF_SAMPLE_CALLCHAIN``) configured events, for instance, when using +'``perf record -g``' or '``perf trace --call-graph fp``'. This can only be done when no events are in use that have callchains -enabled, otherwise writing to this file will return -EBUSY. +enabled, otherwise writing to this file will return ``-EBUSY``. The default value is 127. -perf_event_mlock_kb: -==================== +perf_event_mlock_kb +=================== Control size of per-cpu ring buffer not counted agains mlock limit. The default value is 512 + 1 page -perf_event_max_contexts_per_stack: -================================== +perf_event_max_contexts_per_stack +================================= Controls maximum number of stack frame context entries for -(attr.sample_type & PERF_SAMPLE_CALLCHAIN) configured events, for -instance, when using 'perf record -g' or 'perf trace --call-graph fp'. +(``attr.sample_type & PERF_SAMPLE_CALLCHAIN``) configured events, for +instance, when using '``perf record -g``' or '``perf trace --call-graph fp``'. This can only be done when no events are in use that have callchains -enabled, otherwise writing to this file will return -EBUSY. +enabled, otherwise writing to this file will return ``-EBUSY``. The default value is 8. -pid_max: -======== +pid_max +======= PID allocation wrap value. When the kernel's next PID value reaches this value, it wraps back to a minimum PID value. -PIDs of value pid_max or larger are not allocated. +PIDs of value ``pid_max`` or larger are not allocated. -ns_last_pid: -============ +ns_last_pid +=========== The last pid allocated in the current (the one task using this sysctl lives in) pid namespace. When selecting a pid for a next task on fork kernel tries to allocate a number starting from this one. -powersave-nap: (PPC only) -========================= +powersave-nap (PPC only) +======================== If set, Linux-PPC will use the 'nap' mode of powersaving, otherwise the 'doze' mode will be used. + ============================================================== -printk: -======= +printk +====== -The four values in printk denote: console_loglevel, -default_message_loglevel, minimum_console_loglevel and -default_console_loglevel respectively. +The four values in printk denote: ``console_loglevel``, +``default_message_loglevel``, ``minimum_console_loglevel`` and +``default_console_loglevel`` respectively. These values influence printk() behavior when printing or -logging error messages. See 'man 2 syslog' for more info on +logging error messages. See '``man 2 syslog``' for more info on the different loglevels. -- console_loglevel: - messages with a higher priority than - this will be printed to the console -- default_message_loglevel: - messages without an explicit priority - will be printed with this priority -- minimum_console_loglevel: - minimum (highest) value to which - console_loglevel can be set -- default_console_loglevel: - default value for console_loglevel +======================== ===================================== +console_loglevel messages with a higher priority than + this will be printed to the console +default_message_loglevel messages without an explicit priority + will be printed with this priority +minimum_console_loglevel minimum (highest) value to which + console_loglevel can be set +default_console_loglevel default value for console_loglevel +======================== ===================================== -printk_delay: -============= +printk_delay +============ -Delay each printk message in printk_delay milliseconds +Delay each printk message in ``printk_delay`` milliseconds Value from 0 - 10000 is allowed. -printk_ratelimit: -================= +printk_ratelimit +================ -Some warning messages are rate limited. printk_ratelimit specifies +Some warning messages are rate limited. ``printk_ratelimit`` specifies the minimum length of time between these messages (in seconds). The default value is 5 seconds. A value of 0 will disable rate limiting. -printk_ratelimit_burst: -======================= +printk_ratelimit_burst +====================== -While long term we enforce one message per printk_ratelimit +While long term we enforce one message per `printk_ratelimit`_ seconds, we do allow a burst of messages to pass through. -printk_ratelimit_burst specifies the number of messages we can +``printk_ratelimit_burst`` specifies the number of messages we can send before ratelimiting kicks in. The default value is 10 messages. -printk_devkmsg: -=============== - -Control the logging to /dev/kmsg from userspace: - -ratelimit: - default, ratelimited +printk_devkmsg +============== -on: unlimited logging to /dev/kmsg from userspace +Control the logging to ``/dev/kmsg`` from userspace: -off: logging to /dev/kmsg disabled +========= ============================================= +ratelimit default, ratelimited +on unlimited logging to /dev/kmsg from userspace +off logging to /dev/kmsg disabled +========= ============================================= -The kernel command line parameter printk.devkmsg= overrides this and is +The kernel command line parameter ``printk.devkmsg=`` overrides this and is a one-time setting until next reboot: once set, it cannot be changed by this sysctl interface anymore. +============================================================== -randomize_va_space: -=================== + +pty +=== + +See Documentation/filesystems/devpts.txt. + + +randomize_va_space +================== This option can be used to select the type of process address space randomization that is used in the system, for architectures @@ -882,10 +848,10 @@ that support this feature. This, among other things, implies that shared libraries will be loaded to random addresses. Also for PIE-linked binaries, the location of code start is randomized. This is the default if the - CONFIG_COMPAT_BRK option is enabled. + ``CONFIG_COMPAT_BRK`` option is enabled. 2 Additionally enable heap randomization. This is the default if - CONFIG_COMPAT_BRK is disabled. + ``CONFIG_COMPAT_BRK`` is disabled. There are a few legacy applications out there (such as some ancient versions of libc.so.5 from 1996) that assume that brk area starts @@ -895,21 +861,27 @@ that support this feature. systems it is safe to choose full randomization. Systems with ancient and/or broken binaries should be configured - with CONFIG_COMPAT_BRK enabled, which excludes the heap from process + with ``CONFIG_COMPAT_BRK`` enabled, which excludes the heap from process address space randomization. == =========================================================================== -reboot-cmd: (Sparc only) -======================== +real-root-dev +============= + +See :doc:`/admin-guide/initrd`. + + +reboot-cmd (SPARC only) +======================= ??? This seems to be a way to give an argument to the Sparc ROM/Flash boot loader. Maybe to tell it what to do after rebooting. ??? -rtsig-max & rtsig-nr: -===================== +rtsig-max & rtsig-nr +==================== The file rtsig-max can be used to tune the maximum number of POSIX realtime (queued) signals that can be outstanding @@ -918,8 +890,8 @@ in the system. rtsig-nr shows the number of RT signals currently queued. -sched_energy_aware: -=================== +sched_energy_aware +================== Enables/disables Energy Aware Scheduling (EAS). EAS starts automatically on platforms where it can run (that is, @@ -929,75 +901,85 @@ requirements for EAS but you do not want to use it, change this value to 0. -sched_schedstats: -================= +sched_schedstats +================ Enables/disables scheduler statistics. Enabling this feature incurs a small amount of overhead in the scheduler but is useful for debugging and performance tuning. -sg-big-buff: -============ +seccomp +======= + +See :doc:`/userspace-api/seccomp_filter`. + + +sg-big-buff +=========== This file shows the size of the generic SCSI (sg) buffer. You can't tune it just yet, but you could change it on -compile time by editing include/scsi/sg.h and changing -the value of SG_BIG_BUFF. +compile time by editing ``include/scsi/sg.h`` and changing +the value of ``SG_BIG_BUFF``. There shouldn't be any reason to change this value. If you can come up with one, you probably know what you are doing anyway :) -shmall: -======= +shmall +====== This parameter sets the total amount of shared memory pages that -can be used system wide. Hence, SHMALL should always be at least -ceil(shmmax/PAGE_SIZE). +can be used system wide. Hence, ``shmall`` should always be at least +``ceil(shmmax/PAGE_SIZE)``. -If you are not sure what the default PAGE_SIZE is on your Linux -system, you can run the following command: +If you are not sure what the default ``PAGE_SIZE`` is on your Linux +system, you can run the following command:: # getconf PAGE_SIZE -shmmax: -======= +shmmax +====== This value can be used to query and set the run time limit on the maximum shared memory segment size that can be created. Shared memory segments up to 1Gb are now supported in the -kernel. This value defaults to SHMMAX. +kernel. This value defaults to ``SHMMAX``. -shm_rmid_forced: -================ +shmmni +====== + + +shm_rmid_forced +=============== Linux lets you set resource limits, including how much memory one -process can consume, via setrlimit(2). Unfortunately, shared memory +process can consume, via ``setrlimit(2)``. Unfortunately, shared memory segments are allowed to exist without association with any process, and thus might not be counted against any resource limits. If enabled, shared memory segments are automatically destroyed when their attach count becomes zero after a detach or a process termination. It will also destroy segments that were created, but never attached to, on exit -from the process. The only use left for IPC_RMID is to immediately +from the process. The only use left for ``IPC_RMID`` is to immediately destroy an unattached segment. Of course, this breaks the way things are defined, so some applications might stop working. Note that this feature will do you no good unless you also configure your resource -limits (in particular, RLIMIT_AS and RLIMIT_NPROC). Most systems don't +limits (in particular, ``RLIMIT_AS`` and ``RLIMIT_NPROC``). Most systems don't need this. Note that if you change this from 0 to 1, already created segments without users and with a dead originative process will be destroyed. -sysctl_writes_strict: -===================== +sysctl_writes_strict +==================== Control how file position affects the behavior of updating sysctl values -via the /proc/sys interface: +via the ``/proc/sys`` interface: == ====================================================================== -1 Legacy per-write sysctl value handling, with no printk warnings. @@ -1014,8 +996,8 @@ via the /proc/sys interface: == ====================================================================== -softlockup_all_cpu_backtrace: -============================= +softlockup_all_cpu_backtrace +============================ This value controls the soft lockup detector thread's behavior when a soft lockup condition is detected as to whether or not @@ -1025,43 +1007,56 @@ be issued an NMI and instructed to capture stack trace. This feature is only applicable for architectures which support NMI. -0: do nothing. This is the default behavior. - -1: on detection capture more debug information. += ============================================ +0 Do nothing. This is the default behavior. +1 On detection capture more debug information. += ============================================ -soft_watchdog: -============== +soft_watchdog +============= This parameter can be used to control the soft lockup detector. - 0 - disable the soft lockup detector - - 1 - enable the soft lockup detector += ================================= +0 Disable the soft lockup detector. +1 Enable the soft lockup detector. += ================================= The soft lockup detector monitors CPUs for threads that are hogging the CPUs without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads from running. The mechanism depends on the CPUs ability to respond to timer interrupts which are needed for the 'watchdog/N' threads to be woken up by -the watchdog timer function, otherwise the NMI watchdog - if enabled - can +the watchdog timer function, otherwise the NMI watchdog — if enabled — can detect a hard lockup condition. -stack_erasing: -============== +stack_erasing +============= This parameter can be used to control kernel stack erasing at the end -of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK. +of syscalls for kernels built with ``CONFIG_GCC_PLUGIN_STACKLEAK``. That erasing reduces the information which kernel stack leak bugs can reveal and blocks some uninitialized stack variable attacks. The tradeoff is the performance impact: on a single CPU system kernel compilation sees a 1% slowdown, other systems and workloads may vary. - 0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated. += ==================================================================== +0 Kernel stack erasing is disabled, STACKLEAK_METRICS are not updated. +1 Kernel stack erasing is enabled (default), it is performed before + returning to the userspace at the end of syscalls. += ==================================================================== + + +stop-a (SPARC only) +=================== - 1: kernel stack erasing is enabled (default), it is performed before - returning to the userspace at the end of syscalls. + +sysrq +===== + +See :doc:`/admin-guide/sysrq`. tainted @@ -1091,30 +1086,30 @@ ORed together. The letters are seen in "Tainted" line of Oops reports. 131072 `(T)` The kernel was built with the struct randomization plugin ====== ===== ============================================================== -See Documentation/admin-guide/tainted-kernels.rst for more information. +See :doc:`/admin-guide/tainted-kernels` for more information. -threads-max: -============ +threads-max +=========== This value controls the maximum number of threads that can be created -using fork(). +using ``fork()``. During initialization the kernel sets this value such that even if the maximum number of threads is created, the thread structures occupy only a part (1/8th) of the available RAM pages. -The minimum value that can be written to threads-max is 1. +The minimum value that can be written to ``threads-max`` is 1. -The maximum value that can be written to threads-max is given by the -constant FUTEX_TID_MASK (0x3fffffff). +The maximum value that can be written to ``threads-max`` is given by the +constant ``FUTEX_TID_MASK`` (0x3fffffff). -If a value outside of this range is written to threads-max an error -EINVAL occurs. +If a value outside of this range is written to ``threads-max`` an +``EINVAL`` error occurs. -unknown_nmi_panic: -================== +unknown_nmi_panic +================= The value in this file affects behavior of handling NMI. When the value is non-zero, unknown NMI is trapped and then panic occurs. At @@ -1124,37 +1119,39 @@ NMI switch that most IA32 servers have fires unknown NMI up, for example. If a system hangs up, try pressing the NMI switch. -watchdog: -========= +watchdog +======== This parameter can be used to disable or enable the soft lockup detector -_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time. +*and* the NMI watchdog (i.e. the hard lockup detector) at the same time. - 0 - disable both lockup detectors - - 1 - enable both lockup detectors += ============================== +0 Disable both lockup detectors. +1 Enable both lockup detectors. += ============================== The soft lockup detector and the NMI watchdog can also be disabled or -enabled individually, using the soft_watchdog and nmi_watchdog parameters. -If the watchdog parameter is read, for example by executing:: +enabled individually, using the ``soft_watchdog`` and ``nmi_watchdog`` +parameters. +If the ``watchdog`` parameter is read, for example by executing:: cat /proc/sys/kernel/watchdog -the output of this command (0 or 1) shows the logical OR of soft_watchdog -and nmi_watchdog. +the output of this command (0 or 1) shows the logical OR of +``soft_watchdog`` and ``nmi_watchdog``. -watchdog_cpumask: -================= +watchdog_cpumask +================ This value can be used to control on which cpus the watchdog may run. -The default cpumask is all possible cores, but if NO_HZ_FULL is +The default cpumask is all possible cores, but if ``NO_HZ_FULL`` is enabled in the kernel config, and cores are specified with the -nohz_full= boot argument, those cores are excluded by default. +``nohz_full=`` boot argument, those cores are excluded by default. Offline cores can be included in this mask, and if the core is later brought online, the watchdog will be started based on the mask value. -Typically this value would only be touched in the nohz_full case +Typically this value would only be touched in the ``nohz_full`` case to re-enable cores that by default were not running the watchdog, if a kernel lockup was suspected on those cores. @@ -1165,12 +1162,12 @@ might say:: echo 0,2-4 > /proc/sys/kernel/watchdog_cpumask -watchdog_thresh: -================ +watchdog_thresh +=============== This value can be used to control the frequency of hrtimer and NMI events and the soft and hard lockup thresholds. The default threshold is 10 seconds. -The softlockup threshold is (2 * watchdog_thresh). Setting this +The softlockup threshold is (``2 * watchdog_thresh``). Setting this tunable to zero will disable lockup detection altogether. -- cgit v1.2.3 From 0317c5371e6a9b71a2e25b47013dd5c62d55d1a6 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 18 Feb 2020 13:59:17 +0100 Subject: docs: merge debugging-modules.txt into sysctl/kernel.rst This fits nicely in sysctl/kernel.rst, merge it (and rephrase it) instead of linking to it. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 14 +++++++++++++- Documentation/debugging-modules.txt | 22 ---------------------- 2 files changed, 13 insertions(+), 23 deletions(-) delete mode 100644 Documentation/debugging-modules.txt (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 4872610cc491..bb56ff25d947 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -387,7 +387,19 @@ This flag controls the L2 cache of G3 processor boards. If modprobe ======== -See Documentation/debugging-modules.txt. +This gives the full path of the modprobe command which the kernel will +use to load modules. This can be used to debug module loading +requests:: + + echo '#! /bin/sh' > /tmp/modprobe + echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe + echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe + chmod a+x /tmp/modprobe + echo /tmp/modprobe > /proc/sys/kernel/modprobe + +This only applies when the *kernel* is requesting that the module be +loaded; it won't have any effect if the module is being loaded +explicitly using ``modprobe`` from userspace. modules_disabled diff --git a/Documentation/debugging-modules.txt b/Documentation/debugging-modules.txt deleted file mode 100644 index 172ad4aec493..000000000000 --- a/Documentation/debugging-modules.txt +++ /dev/null @@ -1,22 +0,0 @@ -Debugging Modules after 2.6.3 ------------------------------ - -In almost all distributions, the kernel asks for modules which don't -exist, such as "net-pf-10" or whatever. Changing "modprobe -q" to -"succeed" in this case is hacky and breaks some setups, and also we -want to know if it failed for the fallback code for old aliases in -fs/char_dev.c, for example. - -In the past a debugging message which would fill people's logs was -emitted. This debugging message has been removed. The correct way -of debugging module problems is something like this: - -echo '#! /bin/sh' > /tmp/modprobe -echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe -echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe -chmod a+x /tmp/modprobe -echo /tmp/modprobe > /proc/sys/kernel/modprobe - -Note that the above applies only when the *kernel* is requesting -that the module be loaded -- it won't have any effect if that module -is being loaded explicitly using "modprobe" from userspace. -- cgit v1.2.3 From a474105bb6a6fe85ea30d7fe0a087184da32c751 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 18 Feb 2020 13:59:18 +0100 Subject: docs: drop l2cr from sysctl/kernel.rst The l2cr sysctl entry was removed in commit c2f3dabefa73 ("sysctl: kill binary sysctl KERN_PPC_L2CR"), this removes the corresponding documentation. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 7 ------- 1 file changed, 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index bb56ff25d947..99569a26f93e 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -377,13 +377,6 @@ When ``kptr_restrict`` is set to 2, kernel pointers printed using %pK will be replaced with 0s regardless of privileges. -l2cr (PPC only) -=============== - -This flag controls the L2 cache of G3 processor boards. If -0, the cache is disabled. Enabled if nonzero. - - modprobe ======== -- cgit v1.2.3 From fa5b526411bb5afe7736ce14bab18c0b68db4251 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 18 Feb 2020 13:59:19 +0100 Subject: docs: add missing IPC documentation in sysctl/kernel.rst This adds short descriptions of msgmax, msgmnb, msgmni, and shmmni, which were previously listed in kernel.rst but not described. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 99569a26f93e..0ae52156db75 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -410,6 +410,15 @@ to false. Generally used with the `kexec_load_disabled`_ toggle. msgmax, msgmnb, and msgmni ========================== +``msgmax`` is the maximum size of an IPC message, in bytes. 8192 by +default (``MSGMAX``). + +``msgmnb`` is the maximum size of an IPC queue, in bytes. 16384 by +default (``MSGMNB``). + +``msgmni`` is the maximum number of IPC queues. 32000 by default +(``MSGMNI``). + msg_next_id, sem_next_id, and shm_next_id (System V IPC) ======================================================== @@ -958,6 +967,9 @@ kernel. This value defaults to ``SHMMAX``. shmmni ====== +This value determines the maximum number of shared memory segments. +4096 by default (``SHMMNI``). + shm_rmid_forced =============== -- cgit v1.2.3 From a1ad4f15054b58636aa58f0df2961259f8781746 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 18 Feb 2020 13:59:20 +0100 Subject: docs: document stop-a in sysctl/kernel.rst This describes the SPARC-specific stop-a sysctl entry, which was previously listed in kernel.rst but not documented. Base on the implementation in arch/sparc/kernel/setup_{32,64}.c and kernel/panic.c. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 0ae52156db75..3cbbe4502e18 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1069,6 +1069,16 @@ compilation sees a 1% slowdown, other systems and workloads may vary. stop-a (SPARC only) =================== +Controls Stop-A: + += ==================================== +0 Stop-A has no effect. +1 Stop-A breaks to the PROM (default). += ==================================== + +Stop-A is always enabled on a panic, so that the user can return to +the boot PROM. + sysrq ===== -- cgit v1.2.3 From 404347e68aeb81b89dc440135ed23fcabff104f9 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 18 Feb 2020 13:59:21 +0100 Subject: docs: document panic fully in sysctl/kernel.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The description of panic doesn’t cover all the supported scenarios; this patch fixes that, describing the three possibilities (no reboot, immediate reboot, reboot after a delay). Based on the implementation in kernel/panic.c. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 3cbbe4502e18..60c97a79ff26 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -559,9 +559,15 @@ The default is 65534. panic ===== -The value in this file represents the number of seconds the kernel -waits before rebooting on a panic. When you use the software watchdog, -the recommended setting is 60. +The value in this file determines the behaviour of the kernel on a +panic: + +* if zero, the kernel will loop forever; +* if negative, the kernel will reboot immediately; +* if positive, the kernel will reboot after the corresponding number + of seconds. + +When you use the software watchdog, the recommended setting is 60. panic_on_io_nmi -- cgit v1.2.3 From 8f21f54b8a9517e0213948088aca757a0f122447 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 18 Feb 2020 13:59:23 +0100 Subject: docs: sysctl/kernel: remove rtsig entries These have no corresponding code in the kernel. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 60c97a79ff26..6c0d8c55101c 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -900,16 +900,6 @@ ROM/Flash boot loader. Maybe to tell it what to do after rebooting. ??? -rtsig-max & rtsig-nr -==================== - -The file rtsig-max can be used to tune the maximum number -of POSIX realtime (queued) signals that can be outstanding -in the system. - -rtsig-nr shows the number of RT signals currently queued. - - sched_energy_aware ================== -- cgit v1.2.3 From dff2c2e69f308c1c7d296d49d2b0467e9675b58e Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Tue, 18 Feb 2020 15:10:13 +0530 Subject: Replace dead urls with active urls for Mutt This patch replace stale/dead urls with active urls for Mutt. Signed-off-by: Bhaskar Chowdhury Signed-off-by: Jonathan Corbet --- Documentation/process/email-clients.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/process/email-clients.rst b/Documentation/process/email-clients.rst index 5273d06c8ff6..c9e4ce2613c0 100644 --- a/Documentation/process/email-clients.rst +++ b/Documentation/process/email-clients.rst @@ -237,9 +237,9 @@ using Mutt to send patches through Gmail:: The Mutt docs have lots more information: - http://dev.mutt.org/trac/wiki/UseCases/Gmail + https://gitlab.com/muttmua/mutt/-/wikis/UseCases/Gmail - http://dev.mutt.org/doc/manual.html + http://www.mutt.org/doc/manual/ Pine (TUI) ********** -- cgit v1.2.3 From fb0e0ffe7fc8e0e91481e67665f1d646bfd071f2 Mon Sep 17 00:00:00 2001 From: Tony Fischetti Date: Sun, 16 Feb 2020 19:08:26 -0500 Subject: Documentation: bring process docs up to date The guide to the kernel dev process documentation, for example, contains references to older kernels and their timelines. In addition, one of the "long term support kernels" listed have since reached EOL, and a new one has been named. This patch brings information/tables up to date. Additionally, some very trivial grammatical errors, unclear sentences, and potentially unsavory diction have been edited. Signed-off-by: Tony Fischetti Reviewed-by: Randy Dunlap Signed-off-by: Jonathan Corbet --- Documentation/process/2.Process.rst | 108 +++++++++++++++++---------------- Documentation/process/coding-style.rst | 18 +++--- Documentation/process/howto.rst | 17 +++--- 3 files changed, 73 insertions(+), 70 deletions(-) (limited to 'Documentation') diff --git a/Documentation/process/2.Process.rst b/Documentation/process/2.Process.rst index ae020d84d7c4..b21b5b245d13 100644 --- a/Documentation/process/2.Process.rst +++ b/Documentation/process/2.Process.rst @@ -18,18 +18,18 @@ major kernel release happening every two or three months. The recent release history looks like this: ====== ================= - 4.11 April 30, 2017 - 4.12 July 2, 2017 - 4.13 September 3, 2017 - 4.14 November 12, 2017 - 4.15 January 28, 2018 - 4.16 April 1, 2018 + 5.0 March 3, 2019 + 5.1 May 5, 2019 + 5.2 July 7, 2019 + 5.3 September 15, 2019 + 5.4 November 24, 2019 + 5.5 January 6, 2020 ====== ================= -Every 4.x release is a major kernel release with new features, internal -API changes, and more. A typical 4.x release contain about 13,000 -changesets with changes to several hundred thousand lines of code. 4.x is -thus the leading edge of Linux kernel development; the kernel uses a +Every 5.x release is a major kernel release with new features, internal +API changes, and more. A typical release can contain about 13,000 +changesets with changes to several hundred thousand lines of code. 5.x is +the leading edge of Linux kernel development; the kernel uses a rolling development model which is continually integrating major changes. A relatively straightforward discipline is followed with regard to the @@ -48,9 +48,9 @@ detail later on). The merge window lasts for approximately two weeks. At the end of this time, Linus Torvalds will declare that the window is closed and release the -first of the "rc" kernels. For the kernel which is destined to be 2.6.40, +first of the "rc" kernels. For the kernel which is destined to be 5.6, for example, the release which happens at the end of the merge window will -be called 2.6.40-rc1. The -rc1 release is the signal that the time to +be called 5.6-rc1. The -rc1 release is the signal that the time to merge new features has passed, and that the time to stabilize the next kernel has begun. @@ -67,22 +67,23 @@ add at any time). As fixes make their way into the mainline, the patch rate will slow over time. Linus releases new -rc kernels about once a week; a normal series will get up to somewhere between -rc6 and -rc9 before the kernel is -considered to be sufficiently stable and the final 2.6.x release is made. +considered to be sufficiently stable and the final release is made. At that point the whole process starts over again. -As an example, here is how the 4.16 development cycle went (all dates in -2018): +As an example, here is how the 5.4 development cycle went (all dates in +2019): ============== =============================== - January 28 4.15 stable release - February 11 4.16-rc1, merge window closes - February 18 4.16-rc2 - February 25 4.16-rc3 - March 4 4.16-rc4 - March 11 4.16-rc5 - March 18 4.16-rc6 - March 25 4.16-rc7 - April 1 4.16 stable release + September 15 5.3 stable release + September 30 5.4-rc1, merge window closes + October 6 5.4-rc2 + October 13 5.4-rc3 + October 20 5.4-rc4 + October 27 5.4-rc5 + November 3 5.4-rc6 + November 10 5.4-rc7 + November 17 5.4-rc8 + November 24 5.4 stable release ============== =============================== How do the developers decide when to close the development cycle and create @@ -98,43 +99,44 @@ release is made. In the real world, this kind of perfection is hard to achieve; there are just too many variables in a project of this size. There comes a point where delaying the final release just makes the problem worse; the pile of changes waiting for the next merge window will grow -larger, creating even more regressions the next time around. So most 4.x +larger, creating even more regressions the next time around. So most 5.x kernels go out with a handful of known regressions though, hopefully, none of them are serious. Once a stable release is made, its ongoing maintenance is passed off to the -"stable team," currently consisting of Greg Kroah-Hartman. The stable team -will release occasional updates to the stable release using the 4.x.y -numbering scheme. To be considered for an update release, a patch must (1) -fix a significant bug, and (2) already be merged into the mainline for the -next development kernel. Kernels will typically receive stable updates for -a little more than one development cycle past their initial release. So, -for example, the 4.13 kernel's history looked like: +"stable team," currently Greg Kroah-Hartman. The stable team will release +occasional updates to the stable release using the 5.x.y numbering scheme. +To be considered for an update release, a patch must (1) fix a significant +bug, and (2) already be merged into the mainline for the next development +kernel. Kernels will typically receive stable updates for a little more +than one development cycle past their initial release. So, for example, the +5.2 kernel's history looked like this (all dates in 2019): ============== =============================== - September 3 4.13 stable release - September 13 4.13.1 - September 20 4.13.2 - September 27 4.13.3 - October 5 4.13.4 - October 12 4.13.5 + September 15 5.2 stable release + July 14 5.2.1 + July 21 5.2.2 + July 26 5.2.3 + July 28 5.2.4 + July 31 5.2.5 ... ... - November 24 4.13.16 + October 11 5.2.21 ============== =============================== -4.13.16 was the final stable update of the 4.13 release. +5.2.21 was the final stable update of the 5.2 release. Some kernels are designated "long term" kernels; they will receive support for a longer period. As of this writing, the current long term kernels and their maintainers are: - ====== ====================== ============================== - 3.16 Ben Hutchings (very long-term stable kernel) - 4.1 Sasha Levin - 4.4 Greg Kroah-Hartman (very long-term stable kernel) - 4.9 Greg Kroah-Hartman - 4.14 Greg Kroah-Hartman - ====== ====================== ============================== + ====== ================================ ======================= + 3.16 Ben Hutchings (very long-term kernel) + 4.4 Greg Kroah-Hartman & Sasha Levin (very long-term kernel) + 4.9 Greg Kroah-Hartman & Sasha Levin + 4.14 Greg Kroah-Hartman & Sasha Levin + 4.19 Greg Kroah-Hartman & Sasha Levin + 5.4 Greg Kroah-Hartman & Sasha Levin + ====== ================================ ======================= The selection of a kernel for long-term support is purely a matter of a maintainer having the need and the time to maintain that release. There @@ -215,12 +217,12 @@ How patches get into the Kernel ------------------------------- There is exactly one person who can merge patches into the mainline kernel -repository: Linus Torvalds. But, of the over 9,500 patches which went -into the 2.6.38 kernel, only 112 (around 1.3%) were directly chosen by Linus -himself. The kernel project has long since grown to a size where no single -developer could possibly inspect and select every patch unassisted. The -way the kernel developers have addressed this growth is through the use of -a lieutenant system built around a chain of trust. +repository: Linus Torvalds. But, for example, of the over 9,500 patches +which went into the 2.6.38 kernel, only 112 (around 1.3%) were directly +chosen by Linus himself. The kernel project has long since grown to a size +where no single developer could possibly inspect and select every patch +unassisted. The way the kernel developers have addressed this growth is +through the use of a lieutenant system built around a chain of trust. The kernel code base is logically broken down into a set of subsystems: networking, specific architecture support, memory management, video diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst index edb296c52f61..acb2f1b36350 100644 --- a/Documentation/process/coding-style.rst +++ b/Documentation/process/coding-style.rst @@ -284,9 +284,9 @@ context lines. 4) Naming --------- -C is a Spartan language, and so should your naming be. Unlike Modula-2 -and Pascal programmers, C programmers do not use cute names like -ThisVariableIsATemporaryCounter. A C programmer would call that +C is a Spartan language, and your naming conventions should follow suit. +Unlike Modula-2 and Pascal programmers, C programmers do not use cute +names like ThisVariableIsATemporaryCounter. A C programmer would call that variable ``tmp``, which is much easier to write, and not the least more difficult to understand. @@ -300,9 +300,9 @@ that counts the number of active users, you should call that ``count_active_users()`` or similar, you should **not** call it ``cntusr()``. Encoding the type of a function into the name (so-called Hungarian -notation) is brain damaged - the compiler knows the types anyway and can -check those, and it only confuses the programmer. No wonder MicroSoft -makes buggy programs. +notation) is asinine - the compiler knows the types anyway and can check +those, and it only confuses the programmer. No wonder Microsoft makes buggy +programs. LOCAL variable names should be short, and to the point. If you have some random integer loop counter, it should probably be called ``i``. @@ -806,9 +806,9 @@ covers RTL which is used frequently with assembly language in the kernel. ---------------------------- Kernel developers like to be seen as literate. Do mind the spelling -of kernel messages to make a good impression. Do not use crippled -words like ``dont``; use ``do not`` or ``don't`` instead. Make the messages -concise, clear, and unambiguous. +of kernel messages to make a good impression. Do not use incorrect +contractions like ``dont``; use ``do not`` or ``don't`` instead. Make the +messages concise, clear, and unambiguous. Kernel messages do not have to be terminated with a period. diff --git a/Documentation/process/howto.rst b/Documentation/process/howto.rst index b6f5a379ad6c..70791e153de1 100644 --- a/Documentation/process/howto.rst +++ b/Documentation/process/howto.rst @@ -243,10 +243,10 @@ branches. These different branches are: Mainline tree ~~~~~~~~~~~~~ -Mainline tree are maintained by Linus Torvalds, and can be found at +The mainline tree is maintained by Linus Torvalds, and can be found at https://kernel.org or in the repo. Its development process is as follows: - - As soon as a new kernel is released a two weeks window is open, + - As soon as a new kernel is released a two week window is open, during this period of time maintainers can submit big diffs to Linus, usually the patches that have already been included in the linux-next for a few weeks. The preferred way to submit big changes @@ -281,8 +281,9 @@ Various stable trees with multiple major numbers Kernels with 3-part versions are -stable kernels. They contain relatively small and critical fixes for security problems or significant -regressions discovered in a given major mainline release, with the first -2-part of version number are the same correspondingly. +regressions discovered in a given major mainline release. Each release +in a major stable series increments the third part of the version +number, keeping the first two parts the same. This is the recommended branch for users who want the most recent stable kernel and are not interested in helping test development/experimental @@ -359,10 +360,10 @@ Managing bug reports One of the best ways to put into practice your hacking skills is by fixing bugs reported by other people. Not only you will help to make the kernel -more stable, you'll learn to fix real world problems and you will improve -your skills, and other developers will be aware of your presence. Fixing -bugs is one of the best ways to get merits among other developers, because -not many people like wasting time fixing other people's bugs. +more stable, but you'll also learn to fix real world problems and you will +improve your skills, and other developers will be aware of your presence. +Fixing bugs is one of the best ways to get merits among other developers, +because not many people like wasting time fixing other people's bugs. To work in the already reported bug reports, go to https://bugzilla.kernel.org. -- cgit v1.2.3 From 965fc39f73932041441e03730db31516e285b61a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 15 Feb 2020 23:26:06 -0800 Subject: Documentation: sort _SPHINXDIRS for 'make help' Sort the _SPHINXDIRS so that the 'make help' output is easier to read & search and in a predictable order instead of some unknown pseudo-random order. Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Signed-off-by: Jonathan Corbet --- Documentation/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/Makefile b/Documentation/Makefile index d77bb607aea4..79ecee62d597 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -13,7 +13,7 @@ endif SPHINXBUILD = sphinx-build SPHINXOPTS = SPHINXDIRS = . -_SPHINXDIRS = $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst)) +_SPHINXDIRS = $(sort $(patsubst $(srctree)/Documentation/%/index.rst,%,$(wildcard $(srctree)/Documentation/*/index.rst))) SPHINX_CONF = conf.py PAPER = BUILDDIR = $(obj)/output -- cgit v1.2.3 From 1733ec77d34059cd67a7b9677fe2fd3ef977afb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 14 Feb 2020 18:41:32 +0100 Subject: docs: driver-api: edid: Fix list formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without the empty lines, Sphinx renders the list as part of the running text. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Jonathan Corbet --- Documentation/driver-api/edid.rst | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/driver-api/edid.rst b/Documentation/driver-api/edid.rst index b1b5acd501ed..7dc07942ceb2 100644 --- a/Documentation/driver-api/edid.rst +++ b/Documentation/driver-api/edid.rst @@ -11,11 +11,13 @@ Today, with the advent of Kernel Mode Setting, a graphics board is either correctly working because all components follow the standards - or the computer is unusable, because the screen remains dark after booting or it displays the wrong area. Cases when this happens are: + - The graphics board does not recognize the monitor. - The graphics board is unable to detect any EDID data. - The graphics board incorrectly forwards EDID data to the driver. - The monitor sends no or bogus EDID data. - A KVM sends its own EDID data instead of querying the connected monitor. + Adding the kernel parameter "nomodeset" helps in most cases, but causes restrictions later on. -- cgit v1.2.3 From 320bfd91a985f2b945bad611c43add8a3a359845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 14 Feb 2020 18:41:33 +0100 Subject: docs: admin-guide: Move edid.rst from driver-api MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This document describes actions that an admin can do, rather than interfaces available to driver developers, so admin-guide seems to be a more appropriate place for it. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/edid.rst | 60 +++++++++++++++++++++++++++++++++++++ Documentation/admin-guide/index.rst | 1 + Documentation/driver-api/edid.rst | 60 ------------------------------------- Documentation/driver-api/index.rst | 1 - 4 files changed, 61 insertions(+), 61 deletions(-) create mode 100644 Documentation/admin-guide/edid.rst delete mode 100644 Documentation/driver-api/edid.rst (limited to 'Documentation') diff --git a/Documentation/admin-guide/edid.rst b/Documentation/admin-guide/edid.rst new file mode 100644 index 000000000000..7dc07942ceb2 --- /dev/null +++ b/Documentation/admin-guide/edid.rst @@ -0,0 +1,60 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==== +EDID +==== + +In the good old days when graphics parameters were configured explicitly +in a file called xorg.conf, even broken hardware could be managed. + +Today, with the advent of Kernel Mode Setting, a graphics board is +either correctly working because all components follow the standards - +or the computer is unusable, because the screen remains dark after +booting or it displays the wrong area. Cases when this happens are: + +- The graphics board does not recognize the monitor. +- The graphics board is unable to detect any EDID data. +- The graphics board incorrectly forwards EDID data to the driver. +- The monitor sends no or bogus EDID data. +- A KVM sends its own EDID data instead of querying the connected monitor. + +Adding the kernel parameter "nomodeset" helps in most cases, but causes +restrictions later on. + +As a remedy for such situations, the kernel configuration item +CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an +individually prepared or corrected EDID data set in the /lib/firmware +directory from where it is loaded via the firmware interface. The code +(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for +commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200, +1680x1050, 1920x1080) as binary blobs, but the kernel source tree does +not contain code to create these data. In order to elucidate the origin +of the built-in binary EDID blobs and to facilitate the creation of +individual data for a specific misbehaving monitor, commented sources +and a Makefile environment are given here. + +To create binary EDID and C source code files from the existing data +material, simply type "make". + +If you want to create your own EDID file, copy the file 1024x768.S, +replace the settings with your own data and add a new target to the +Makefile. Please note that the EDID data structure expects the timing +values in a different way as compared to the standard X11 format. + +X11: + HTimings: + hdisp hsyncstart hsyncend htotal + VTimings: + vdisp vsyncstart vsyncend vtotal + +EDID:: + + #define XPIX hdisp + #define XBLANK htotal-hdisp + #define XOFFSET hsyncstart-hdisp + #define XPULSE hsyncend-hsyncstart + + #define YPIX vdisp + #define YBLANK vtotal-vdisp + #define YOFFSET vsyncstart-vdisp + #define YPULSE vsyncend-vsyncstart diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index f1d0ccffbe72..5a6269fb8593 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -75,6 +75,7 @@ configure specific aspects of kernel behavior to your liking. cputopology dell_rbu device-mapper/index + edid efi-stub ext4 nfs/index diff --git a/Documentation/driver-api/edid.rst b/Documentation/driver-api/edid.rst deleted file mode 100644 index 7dc07942ceb2..000000000000 --- a/Documentation/driver-api/edid.rst +++ /dev/null @@ -1,60 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -==== -EDID -==== - -In the good old days when graphics parameters were configured explicitly -in a file called xorg.conf, even broken hardware could be managed. - -Today, with the advent of Kernel Mode Setting, a graphics board is -either correctly working because all components follow the standards - -or the computer is unusable, because the screen remains dark after -booting or it displays the wrong area. Cases when this happens are: - -- The graphics board does not recognize the monitor. -- The graphics board is unable to detect any EDID data. -- The graphics board incorrectly forwards EDID data to the driver. -- The monitor sends no or bogus EDID data. -- A KVM sends its own EDID data instead of querying the connected monitor. - -Adding the kernel parameter "nomodeset" helps in most cases, but causes -restrictions later on. - -As a remedy for such situations, the kernel configuration item -CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an -individually prepared or corrected EDID data set in the /lib/firmware -directory from where it is loaded via the firmware interface. The code -(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for -commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200, -1680x1050, 1920x1080) as binary blobs, but the kernel source tree does -not contain code to create these data. In order to elucidate the origin -of the built-in binary EDID blobs and to facilitate the creation of -individual data for a specific misbehaving monitor, commented sources -and a Makefile environment are given here. - -To create binary EDID and C source code files from the existing data -material, simply type "make". - -If you want to create your own EDID file, copy the file 1024x768.S, -replace the settings with your own data and add a new target to the -Makefile. Please note that the EDID data structure expects the timing -values in a different way as compared to the standard X11 format. - -X11: - HTimings: - hdisp hsyncstart hsyncend htotal - VTimings: - vdisp vsyncstart vsyncend vtotal - -EDID:: - - #define XPIX hdisp - #define XBLANK htotal-hdisp - #define XOFFSET hsyncstart-hdisp - #define XPULSE hsyncend-hsyncstart - - #define YPIX vdisp - #define YBLANK vtotal-vdisp - #define YOFFSET vsyncstart-vdisp - #define YPULSE vsyncend-vsyncstart diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst index 0ebe205efd0c..ea3003b3c5e5 100644 --- a/Documentation/driver-api/index.rst +++ b/Documentation/driver-api/index.rst @@ -74,7 +74,6 @@ available subsections can be seen below. connector console dcdbas - edid eisa ipmb isa -- cgit v1.2.3 From b4ce545f349b711351ec4b0df7a3302d91c3dd45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 14 Feb 2020 18:41:35 +0100 Subject: docs: admin-guide: edid: Clarify where to run "make" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When both the documentation and the data files lived in Documentation/EDID, this wasn't necessary, but both have been moved to other directories in the meantime. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/edid.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/edid.rst b/Documentation/admin-guide/edid.rst index 7dc07942ceb2..80deeb21a265 100644 --- a/Documentation/admin-guide/edid.rst +++ b/Documentation/admin-guide/edid.rst @@ -34,7 +34,7 @@ individual data for a specific misbehaving monitor, commented sources and a Makefile environment are given here. To create binary EDID and C source code files from the existing data -material, simply type "make". +material, simply type "make" in tools/edid/. If you want to create your own EDID file, copy the file 1024x768.S, replace the settings with your own data and add a new target to the -- cgit v1.2.3 From e2c79ab7d75b4c6ed827e8078e5ebe2d059edafc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 14 Feb 2020 18:41:34 +0100 Subject: tools/edid: Move EDID data sets from Documentation/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EDID files are not really documentation. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Jonathan Corbet --- Documentation/EDID/1024x768.S | 43 ------- Documentation/EDID/1280x1024.S | 43 ------- Documentation/EDID/1600x1200.S | 43 ------- Documentation/EDID/1680x1050.S | 43 ------- Documentation/EDID/1920x1080.S | 43 ------- Documentation/EDID/800x600.S | 40 ------ Documentation/EDID/Makefile | 37 ------ Documentation/EDID/edid.S | 274 ----------------------------------------- Documentation/EDID/hex | 1 - 9 files changed, 567 deletions(-) delete mode 100644 Documentation/EDID/1024x768.S delete mode 100644 Documentation/EDID/1280x1024.S delete mode 100644 Documentation/EDID/1600x1200.S delete mode 100644 Documentation/EDID/1680x1050.S delete mode 100644 Documentation/EDID/1920x1080.S delete mode 100644 Documentation/EDID/800x600.S delete mode 100644 Documentation/EDID/Makefile delete mode 100644 Documentation/EDID/edid.S delete mode 100644 Documentation/EDID/hex (limited to 'Documentation') diff --git a/Documentation/EDID/1024x768.S b/Documentation/EDID/1024x768.S deleted file mode 100644 index 4aed3f9ab88a..000000000000 --- a/Documentation/EDID/1024x768.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1024x768.S: EDID data set for standard 1024x768 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 65000 /* kHz */ -#define XPIX 1024 -#define YPIX 768 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 320 -#define YBLANK 38 -#define XOFFSET 8 -#define XPULSE 144 -#define YOFFSET 3 -#define YPULSE 6 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux XGA" -#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */ -#define HSYNC_POL 0 -#define VSYNC_POL 0 - -#include "edid.S" diff --git a/Documentation/EDID/1280x1024.S b/Documentation/EDID/1280x1024.S deleted file mode 100644 index b26dd424cad7..000000000000 --- a/Documentation/EDID/1280x1024.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 108000 /* kHz */ -#define XPIX 1280 -#define YPIX 1024 -#define XY_RATIO XY_RATIO_5_4 -#define XBLANK 408 -#define YBLANK 42 -#define XOFFSET 48 -#define XPULSE 112 -#define YOFFSET 1 -#define YPULSE 3 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux SXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1600x1200.S b/Documentation/EDID/1600x1200.S deleted file mode 100644 index 0d091b282768..000000000000 --- a/Documentation/EDID/1600x1200.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor - - Copyright (C) 2013 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 162000 /* kHz */ -#define XPIX 1600 -#define YPIX 1200 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 560 -#define YBLANK 50 -#define XOFFSET 64 -#define XPULSE 192 -#define YOFFSET 1 -#define YPULSE 3 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux UXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1680x1050.S b/Documentation/EDID/1680x1050.S deleted file mode 100644 index 7dfed9a33eab..000000000000 --- a/Documentation/EDID/1680x1050.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 146250 /* kHz */ -#define XPIX 1680 -#define YPIX 1050 -#define XY_RATIO XY_RATIO_16_10 -#define XBLANK 560 -#define YBLANK 39 -#define XOFFSET 104 -#define XPULSE 176 -#define YOFFSET 3 -#define YPULSE 6 -#define DPI 96 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux WSXGA" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/1920x1080.S b/Documentation/EDID/1920x1080.S deleted file mode 100644 index d6ffbba28e95..000000000000 --- a/Documentation/EDID/1920x1080.S +++ /dev/null @@ -1,43 +0,0 @@ -/* - 1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 148500 /* kHz */ -#define XPIX 1920 -#define YPIX 1080 -#define XY_RATIO XY_RATIO_16_9 -#define XBLANK 280 -#define YBLANK 45 -#define XOFFSET 88 -#define XPULSE 44 -#define YOFFSET 4 -#define YPULSE 5 -#define DPI 96 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux FHD" -/* No ESTABLISHED_TIMINGx_BITS */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/800x600.S b/Documentation/EDID/800x600.S deleted file mode 100644 index a5616588de08..000000000000 --- a/Documentation/EDID/800x600.S +++ /dev/null @@ -1,40 +0,0 @@ -/* - 800x600.S: EDID data set for standard 800x600 60 Hz monitor - - Copyright (C) 2011 Carsten Emde - Copyright (C) 2014 Linaro Limited - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. -*/ - -/* EDID */ -#define VERSION 1 -#define REVISION 3 - -/* Display */ -#define CLOCK 40000 /* kHz */ -#define XPIX 800 -#define YPIX 600 -#define XY_RATIO XY_RATIO_4_3 -#define XBLANK 256 -#define YBLANK 28 -#define XOFFSET 40 -#define XPULSE 128 -#define YOFFSET 1 -#define YPULSE 4 -#define DPI 72 -#define VFREQ 60 /* Hz */ -#define TIMING_NAME "Linux SVGA" -#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */ -#define HSYNC_POL 1 -#define VSYNC_POL 1 - -#include "edid.S" diff --git a/Documentation/EDID/Makefile b/Documentation/EDID/Makefile deleted file mode 100644 index 85a927dfab02..000000000000 --- a/Documentation/EDID/Makefile +++ /dev/null @@ -1,37 +0,0 @@ - -SOURCES := $(wildcard [0-9]*x[0-9]*.S) - -BIN := $(patsubst %.S, %.bin, $(SOURCES)) - -IHEX := $(patsubst %.S, %.bin.ihex, $(SOURCES)) - -CODE := $(patsubst %.S, %.c, $(SOURCES)) - -all: $(BIN) $(IHEX) $(CODE) - -clean: - @rm -f *.o *.bin.ihex *.bin *.c - -%.o: %.S - @cc -c $^ - -%.bin.nocrc: %.o - @objcopy -Obinary $^ $@ - -%.crc: %.bin.nocrc - @list=$$(for i in `seq 1 127`; do head -c$$i $^ | tail -c1 \ - | hexdump -v -e '/1 "%02X+"'; done); \ - echo "ibase=16;100-($${list%?})%100" | bc >$@ - -%.p: %.crc %.S - @cc -c -DCRC="$$(cat $*.crc)" -o $@ $*.S - -%.bin: %.p - @objcopy -Obinary $^ $@ - -%.bin.ihex: %.p - @objcopy -Oihex $^ $@ - @dos2unix $@ 2>/dev/null - -%.c: %.bin - @echo "{" >$@; hexdump -f hex $^ >>$@; echo "};" >>$@ diff --git a/Documentation/EDID/edid.S b/Documentation/EDID/edid.S deleted file mode 100644 index c3d13815526d..000000000000 --- a/Documentation/EDID/edid.S +++ /dev/null @@ -1,274 +0,0 @@ -/* - edid.S: EDID data template - - Copyright (C) 2012 Carsten Emde - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - - -/* Manufacturer */ -#define MFG_LNX1 'L' -#define MFG_LNX2 'N' -#define MFG_LNX3 'X' -#define SERIAL 0 -#define YEAR 2012 -#define WEEK 5 - -/* EDID 1.3 standard definitions */ -#define XY_RATIO_16_10 0b00 -#define XY_RATIO_4_3 0b01 -#define XY_RATIO_5_4 0b10 -#define XY_RATIO_16_9 0b11 - -/* Provide defaults for the timing bits */ -#ifndef ESTABLISHED_TIMING1_BITS -#define ESTABLISHED_TIMING1_BITS 0x00 -#endif -#ifndef ESTABLISHED_TIMING2_BITS -#define ESTABLISHED_TIMING2_BITS 0x00 -#endif -#ifndef ESTABLISHED_TIMING3_BITS -#define ESTABLISHED_TIMING3_BITS 0x00 -#endif - -#define mfgname2id(v1,v2,v3) \ - ((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f)) -#define swap16(v1) ((v1>>8)+((v1&0xff)<<8)) -#define lsbs2(v1,v2) (((v1&0x0f)<<4)+(v2&0x0f)) -#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f)) -#define msbs4(v1,v2,v3,v4) \ - ((((v1>>8)&0x03)<<6)+(((v2>>8)&0x03)<<4)+\ - (((v3>>4)&0x03)<<2)+((v4>>4)&0x03)) -#define pixdpi2mm(pix,dpi) ((pix*25)/dpi) -#define xsize pixdpi2mm(XPIX,DPI) -#define ysize pixdpi2mm(YPIX,DPI) - - .data - -/* Fixed header pattern */ -header: .byte 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00 - -mfg_id: .hword swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3)) - -prod_code: .hword 0 - -/* Serial number. 32 bits, little endian. */ -serial_number: .long SERIAL - -/* Week of manufacture */ -week: .byte WEEK - -/* Year of manufacture, less 1990. (1990-2245) - If week=255, it is the model year instead */ -year: .byte YEAR-1990 - -version: .byte VERSION /* EDID version, usually 1 (for 1.3) */ -revision: .byte REVISION /* EDID revision, usually 3 (for 1.3) */ - -/* If Bit 7=1 Digital input. If set, the following bit definitions apply: - Bits 6-1 Reserved, must be 0 - Bit 0 Signal is compatible with VESA DFP 1.x TMDS CRGB, - 1 pixel per clock, up to 8 bits per color, MSB aligned, - If Bit 7=0 Analog input. If clear, the following bit definitions apply: - Bits 6-5 Video white and sync levels, relative to blank - 00=+0.7/-0.3 V; 01=+0.714/-0.286 V; - 10=+1.0/-0.4 V; 11=+0.7/0 V - Bit 4 Blank-to-black setup (pedestal) expected - Bit 3 Separate sync supported - Bit 2 Composite sync (on HSync) supported - Bit 1 Sync on green supported - Bit 0 VSync pulse must be serrated when somposite or - sync-on-green is used. */ -video_parms: .byte 0x6d - -/* Maximum horizontal image size, in centimetres - (max 292 cm/115 in at 16:9 aspect ratio) */ -max_hor_size: .byte xsize/10 - -/* Maximum vertical image size, in centimetres. - If either byte is 0, undefined (e.g. projector) */ -max_vert_size: .byte ysize/10 - -/* Display gamma, minus 1, times 100 (range 1.00-3.5 */ -gamma: .byte 120 - -/* Bit 7 DPMS standby supported - Bit 6 DPMS suspend supported - Bit 5 DPMS active-off supported - Bits 4-3 Display type: 00=monochrome; 01=RGB colour; - 10=non-RGB multicolour; 11=undefined - Bit 2 Standard sRGB colour space. Bytes 25-34 must contain - sRGB standard values. - Bit 1 Preferred timing mode specified in descriptor block 1. - Bit 0 GTF supported with default parameter values. */ -dsp_features: .byte 0xea - -/* Chromaticity coordinates. */ -/* Red and green least-significant bits - Bits 7-6 Red x value least-significant 2 bits - Bits 5-4 Red y value least-significant 2 bits - Bits 3-2 Green x value lst-significant 2 bits - Bits 1-0 Green y value least-significant 2 bits */ -red_green_lsb: .byte 0x5e - -/* Blue and white least-significant 2 bits */ -blue_white_lsb: .byte 0xc0 - -/* Red x value most significant 8 bits. - 0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */ -red_x_msb: .byte 0xa4 - -/* Red y value most significant 8 bits */ -red_y_msb: .byte 0x59 - -/* Green x and y value most significant 8 bits */ -green_x_y_msb: .byte 0x4a,0x98 - -/* Blue x and y value most significant 8 bits */ -blue_x_y_msb: .byte 0x25,0x20 - -/* Default white point x and y value most significant 8 bits */ -white_x_y_msb: .byte 0x50,0x54 - -/* Established timings */ -/* Bit 7 720x400 @ 70 Hz - Bit 6 720x400 @ 88 Hz - Bit 5 640x480 @ 60 Hz - Bit 4 640x480 @ 67 Hz - Bit 3 640x480 @ 72 Hz - Bit 2 640x480 @ 75 Hz - Bit 1 800x600 @ 56 Hz - Bit 0 800x600 @ 60 Hz */ -estbl_timing1: .byte ESTABLISHED_TIMING1_BITS - -/* Bit 7 800x600 @ 72 Hz - Bit 6 800x600 @ 75 Hz - Bit 5 832x624 @ 75 Hz - Bit 4 1024x768 @ 87 Hz, interlaced (1024x768) - Bit 3 1024x768 @ 60 Hz - Bit 2 1024x768 @ 72 Hz - Bit 1 1024x768 @ 75 Hz - Bit 0 1280x1024 @ 75 Hz */ -estbl_timing2: .byte ESTABLISHED_TIMING2_BITS - -/* Bit 7 1152x870 @ 75 Hz (Apple Macintosh II) - Bits 6-0 Other manufacturer-specific display mod */ -estbl_timing3: .byte ESTABLISHED_TIMING3_BITS - -/* Standard timing */ -/* X resolution, less 31, divided by 8 (256-2288 pixels) */ -std_xres: .byte (XPIX/8)-31 -/* Y resolution, X:Y pixel ratio - Bits 7-6 X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9. - Bits 5-0 Vertical frequency, less 60 (60-123 Hz) */ -std_vres: .byte (XY_RATIO<<6)+VFREQ-60 - .fill 7,2,0x0101 /* Unused */ - -descriptor1: -/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */ -clock: .hword CLOCK/10 - -/* Horizontal active pixels 8 lsbits (0-4095) */ -x_act_lsb: .byte XPIX&0xff -/* Horizontal blanking pixels 8 lsbits (0-4095) - End of active to start of next active. */ -x_blk_lsb: .byte XBLANK&0xff -/* Bits 7-4 Horizontal active pixels 4 msbits - Bits 3-0 Horizontal blanking pixels 4 msbits */ -x_msbs: .byte msbs2(XPIX,XBLANK) - -/* Vertical active lines 8 lsbits (0-4095) */ -y_act_lsb: .byte YPIX&0xff -/* Vertical blanking lines 8 lsbits (0-4095) */ -y_blk_lsb: .byte YBLANK&0xff -/* Bits 7-4 Vertical active lines 4 msbits - Bits 3-0 Vertical blanking lines 4 msbits */ -y_msbs: .byte msbs2(YPIX,YBLANK) - -/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */ -x_snc_off_lsb: .byte XOFFSET&0xff -/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */ -x_snc_pls_lsb: .byte XPULSE&0xff -/* Bits 7-4 Vertical sync offset lines 4 lsbits (0-63) - Bits 3-0 Vertical sync pulse width lines 4 lsbits (0-63) */ -y_snc_lsb: .byte lsbs2(YOFFSET, YPULSE) -/* Bits 7-6 Horizontal sync offset pixels 2 msbits - Bits 5-4 Horizontal sync pulse width pixels 2 msbits - Bits 3-2 Vertical sync offset lines 2 msbits - Bits 1-0 Vertical sync pulse width lines 2 msbits */ -xy_snc_msbs: .byte msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE) - -/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */ -x_dsp_size: .byte xsize&0xff - -/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */ -y_dsp_size: .byte ysize&0xff - -/* Bits 7-4 Horizontal display size, mm, 4 msbits - Bits 3-0 Vertical display size, mm, 4 msbits */ -dsp_size_mbsb: .byte msbs2(xsize,ysize) - -/* Horizontal border pixels (each side; total is twice this) */ -x_border: .byte 0 -/* Vertical border lines (each side; total is twice this) */ -y_border: .byte 0 - -/* Bit 7 Interlaced - Bits 6-5 Stereo mode: 00=No stereo; other values depend on bit 0: - Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar, - sync=1 during left; 11=4-way interleaved stereo - Bit 0=1 2-way interleaved stereo: 01=Right image on even lines; - 10=Left image on even lines; 11=side-by-side - Bits 4-3 Sync type: 00=Analog composite; 01=Bipolar analog composite; - 10=Digital composite (on HSync); 11=Digital separate - Bit 2 If digital separate: Vertical sync polarity (1=positive) - Other types: VSync serrated (HSync during VSync) - Bit 1 If analog sync: Sync on all 3 RGB lines (else green only) - Digital: HSync polarity (1=positive) - Bit 0 2-way line-interleaved stereo, if bits 4-3 are not 00. */ -features: .byte 0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1) - -descriptor2: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xff /* Descriptor is monitor serial number (text) */ - .byte 0 /* Must be zero */ -start1: .ascii "Linux #0" -end1: .byte 0x0a /* End marker */ - .fill 12-(end1-start1), 1, 0x20 /* Padded spaces */ -descriptor3: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xfd /* Descriptor is monitor range limits */ - .byte 0 /* Must be zero */ -start2: .byte VFREQ-1 /* Minimum vertical field rate (1-255 Hz) */ - .byte VFREQ+1 /* Maximum vertical field rate (1-255 Hz) */ - .byte (CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate - (1-255 kHz) */ - .byte (CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate - (1-255 kHz) */ - .byte (CLOCK/10000)+1 /* Maximum pixel clock rate, rounded up - to 10 MHz multiple (10-2550 MHz) */ - .byte 0 /* No extended timing information type */ -end2: .byte 0x0a /* End marker */ - .fill 12-(end2-start2), 1, 0x20 /* Padded spaces */ -descriptor4: .byte 0,0 /* Not a detailed timing descriptor */ - .byte 0 /* Must be zero */ - .byte 0xfc /* Descriptor is text */ - .byte 0 /* Must be zero */ -start3: .ascii TIMING_NAME -end3: .byte 0x0a /* End marker */ - .fill 12-(end3-start3), 1, 0x20 /* Padded spaces */ -extensions: .byte 0 /* Number of extensions to follow */ -checksum: .byte CRC /* Sum of all bytes must be 0 */ diff --git a/Documentation/EDID/hex b/Documentation/EDID/hex deleted file mode 100644 index 8873ebb618af..000000000000 --- a/Documentation/EDID/hex +++ /dev/null @@ -1 +0,0 @@ -"\t" 8/1 "0x%02x, " "\n" -- cgit v1.2.3 From 3f5f1f22ef10ee0278cef1243944c93aca01b236 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:09 +0530 Subject: Documentation/ABI: Mark /sys/kernel/fadump_* sysfs files deprecated Add a deprecation note in FADump sysfs ABI documentation files and move them from ABI/testing to ABI/obsolete directory. Signed-off-by: Sourabh Jain [mpe: Use a proper table to fix errors from the documentation build] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-6-sourabhjain@linux.ibm.com --- Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled | 9 +++++++++ Documentation/ABI/obsolete/sysfs-kernel-fadump_registered | 10 ++++++++++ Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem | 10 ++++++++++ Documentation/ABI/testing/sysfs-kernel-fadump_enabled | 7 ------- Documentation/ABI/testing/sysfs-kernel-fadump_registered | 8 -------- Documentation/ABI/testing/sysfs-kernel-fadump_release_mem | 8 -------- Documentation/powerpc/firmware-assisted-dump.rst | 12 ++++++++++++ 7 files changed, 41 insertions(+), 23 deletions(-) create mode 100644 Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled create mode 100644 Documentation/ABI/obsolete/sysfs-kernel-fadump_registered create mode 100644 Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem delete mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_enabled delete mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_registered delete mode 100644 Documentation/ABI/testing/sysfs-kernel-fadump_release_mem (limited to 'Documentation') diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled new file mode 100644 index 000000000000..e9c2de8b3688 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_enabled @@ -0,0 +1,9 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/enabled. + +What: /sys/kernel/fadump_enabled +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Primarily used to identify whether the FADump is enabled in + the kernel or not. +User: Kdump service diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered new file mode 100644 index 000000000000..0360be39c98e --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered @@ -0,0 +1,10 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬ + +What: /sys/kernel/fadump_registered +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Helps to control the dump collect feature from userspace. + Setting 1 to this file enables the system to collect the + dump and 0 to disable it. +User: Kdump service diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem new file mode 100644 index 000000000000..6ce0b129ab12 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem @@ -0,0 +1,10 @@ +This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬ + +What: /sys/kernel/fadump_release_mem +Date: Feb 2012 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + This is a special sysfs file and only available when + the system is booted to capture the vmcore using FADump. + It is used to release the memory reserved by FADump to + save the crash dump. diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_enabled b/Documentation/ABI/testing/sysfs-kernel-fadump_enabled deleted file mode 100644 index f73632b1c006..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-fadump_enabled +++ /dev/null @@ -1,7 +0,0 @@ -What: /sys/kernel/fadump_enabled -Date: Feb 2012 -Contact: linuxppc-dev@lists.ozlabs.org -Description: read only - Primarily used to identify whether the FADump is enabled in - the kernel or not. -User: Kdump service diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_registered b/Documentation/ABI/testing/sysfs-kernel-fadump_registered deleted file mode 100644 index dcf925e53f0f..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-fadump_registered +++ /dev/null @@ -1,8 +0,0 @@ -What: /sys/kernel/fadump_registered -Date: Feb 2012 -Contact: linuxppc-dev@lists.ozlabs.org -Description: read/write - Helps to control the dump collect feature from userspace. - Setting 1 to this file enables the system to collect the - dump and 0 to disable it. -User: Kdump service diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem b/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem deleted file mode 100644 index 9c20d64ab48d..000000000000 --- a/Documentation/ABI/testing/sysfs-kernel-fadump_release_mem +++ /dev/null @@ -1,8 +0,0 @@ -What: /sys/kernel/fadump_release_mem -Date: Feb 2012 -Contact: linuxppc-dev@lists.ozlabs.org -Description: write only - This is a special sysfs file and only available when - the system is booted to capture the vmcore using FADump. - It is used to release the memory reserved by FADump to - save the crash dump. diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst index 345a3405206e..2cd65a0df9b8 100644 --- a/Documentation/powerpc/firmware-assisted-dump.rst +++ b/Documentation/powerpc/firmware-assisted-dump.rst @@ -295,6 +295,18 @@ Note: /sys/kernel/fadump_release_opalcore sysfs has moved to echo 1 > /sys/firmware/opal/mpipl/release_core +Note: The following FADump sysfs files are deprecated. + ++----------------------------------+--------------------------------+ +| Deprecated | Alternative | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_enabled | /sys/kernel/fadump/enabled | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_registered | /sys/kernel/fadump/registered | ++----------------------------------+--------------------------------+ +| /sys/kernel/fadump_release_mem | /sys/kernel/fadump/release_mem | ++----------------------------------+--------------------------------+ + Here is the list of files under powerpc debugfs: (Assuming debugfs is mounted on /sys/kernel/debug directory.) -- cgit v1.2.3 From d8e73458f33a24810413ee3a0cd020b644de2f98 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Wed, 11 Dec 2019 21:39:10 +0530 Subject: powerpc/fadump: sysfs for fadump memory reservation Add a sys interface to allow querying the memory reserved by FADump for saving the crash dump. Also added Documentation/ABI for the new sysfs file. Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20191211160910.21656-7-sourabhjain@linux.ibm.com --- Documentation/ABI/testing/sysfs-kernel-fadump | 7 +++++++ Documentation/powerpc/firmware-assisted-dump.rst | 5 +++++ 2 files changed, 12 insertions(+) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-kernel-fadump b/Documentation/ABI/testing/sysfs-kernel-fadump index 5d988b919e81..8f7a64a81783 100644 --- a/Documentation/ABI/testing/sysfs-kernel-fadump +++ b/Documentation/ABI/testing/sysfs-kernel-fadump @@ -31,3 +31,10 @@ Description: write only the system is booted to capture the vmcore using FADump. It is used to release the memory reserved by FADump to save the crash dump. + +What: /sys/kernel/fadump/mem_reserved +Date: Dec 2019 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Provide information about the amount of memory reserved by + FADump to save the crash dump in bytes. diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst index 2cd65a0df9b8..b3f3ee135dbe 100644 --- a/Documentation/powerpc/firmware-assisted-dump.rst +++ b/Documentation/powerpc/firmware-assisted-dump.rst @@ -268,6 +268,11 @@ Here is the list of files under kernel sysfs: be handled and vmcore will not be captured. This interface can be easily integrated with kdump service start/stop. + /sys/kernel/fadump/mem_reserved + + This is used to display the memory reserved by FADump for saving the + crash dump. + /sys/kernel/fadump_release_mem This file is available only when FADump is active during second kernel. This is used to release the reserved memory -- cgit v1.2.3 From e14980976534d9d94f5cddd70033707965482ede Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 18 Feb 2020 21:31:58 +0000 Subject: ASoC: dt-bindings: Make RK3328 codec GPIO explicit Existing RK3328 codec drivers have overloaded the GRF phandle to assume implicit control of the limited-function GPIO_MUTE pin, which is usually used to enable an external audio line driver IC. Since this pin has a proper binding of its own (see gpio/rockchip,rk3328-grf-gpio.txt), make a GPIO explicit in the codec binding too. This will help avoid ambiguity on boards that use that pin for some other purpose. (and while touching the example, enforce the "don't include status" rule) Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/5f7a399dea8a9dedef57f6f99f0f6ab1c1fdc56a.1581376744.git.robin.murphy@arm.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/rockchip,rk3328-codec.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/rockchip,rk3328-codec.txt b/Documentation/devicetree/bindings/sound/rockchip,rk3328-codec.txt index 2469588c7ccb..1ecd75d2032a 100644 --- a/Documentation/devicetree/bindings/sound/rockchip,rk3328-codec.txt +++ b/Documentation/devicetree/bindings/sound/rockchip,rk3328-codec.txt @@ -10,6 +10,11 @@ Required properties: - clock-names: should be "pclk". - spk-depop-time-ms: speak depop time msec. +Optional properties: + +- mute-gpios: GPIO specifier for external line driver control (typically the + dedicated GPIO_MUTE pin) + Example for rk3328 internal codec: codec: codec@ff410000 { @@ -18,6 +23,6 @@ codec: codec@ff410000 { rockchip,grf = <&grf>; clocks = <&cru PCLK_ACODEC>; clock-names = "pclk"; + mute-gpios = <&grf_gpio 0 GPIO_ACTIVE_LOW>; spk-depop-time-ms = 100; - status = "disabled"; }; -- cgit v1.2.3 From db6c122b8fb16a346b87fbcb4987515429e5e7cf Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Mon, 27 Jan 2020 08:23:08 -0800 Subject: dt-bindings: edac: Dmc-520.yaml Add the device tree bindings for the EDAC driver dmc520_edac. Signed-off-by: Lei Wang Signed-off-by: Shiping Ji Signed-off-by: Borislav Petkov Reviewed-by: James Morse Reviewed-by: Rob Herring Link: https://lkml.kernel.org/r/5354a9c3-5b5a-486a-9d19-fa9be169faef@gmail.com --- .../devicetree/bindings/edac/dmc-520.yaml | 59 ++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 Documentation/devicetree/bindings/edac/dmc-520.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/edac/dmc-520.yaml b/Documentation/devicetree/bindings/edac/dmc-520.yaml new file mode 100644 index 000000000000..9272d2bd8634 --- /dev/null +++ b/Documentation/devicetree/bindings/edac/dmc-520.yaml @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/edac/dmc-520.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM DMC-520 EDAC bindings + +maintainers: + - Lei Wang + +description: |+ + DMC-520 node is defined to describe DRAM error detection and correction. + + https://static.docs.arm.com/100000/0200/corelink_dmc520_trm_100000_0200_01_en.pdf + +properties: + compatible: + items: + - const: brcm,dmc-520 + - const: arm,dmc-520 + + reg: + maxItems: 1 + + interrupts: + minItems: 1 + maxItems: 10 + + interrupt-names: + minItems: 1 + maxItems: 10 + items: + enum: + - ram_ecc_errc + - ram_ecc_errd + - dram_ecc_errc + - dram_ecc_errd + - failed_access + - failed_prog + - link_err + - temperature_event + - arch_fsm + - phy_request + +required: + - compatible + - reg + - interrupts + - interrupt-names + +examples: + - | + dmc0: dmc@200000 { + compatible = "brcm,dmc-520", "arm,dmc-520"; + reg = <0x200000 0x80000>; + interrupts = <0x0 0x349 0x4>, <0x0 0x34B 0x4>; + interrupt-names = "dram_ecc_errc", "dram_ecc_errd"; + }; -- cgit v1.2.3 From 2f384801a4e0bcca7f5846c346decffed44f0c0b Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 7 Feb 2020 11:03:05 +0100 Subject: dt-bindinsg: net: can: Convert can-transceiver to json-schema Convert can-transceiver property to json-schema Signed-off-by: Benjamin Gaignard Signed-off-by: Rob Herring --- .../bindings/net/can/can-transceiver.txt | 24 ---------------------- .../bindings/net/can/can-transceiver.yaml | 18 ++++++++++++++++ 2 files changed, 18 insertions(+), 24 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/can/can-transceiver.txt create mode 100644 Documentation/devicetree/bindings/net/can/can-transceiver.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/can/can-transceiver.txt b/Documentation/devicetree/bindings/net/can/can-transceiver.txt deleted file mode 100644 index 0011f53ff159..000000000000 --- a/Documentation/devicetree/bindings/net/can/can-transceiver.txt +++ /dev/null @@ -1,24 +0,0 @@ -Generic CAN transceiver Device Tree binding ------------------------------- - -CAN transceiver typically limits the max speed in standard CAN and CAN FD -modes. Typically these limitations are static and the transceivers themselves -provide no way to detect this limitation at runtime. For this situation, -the "can-transceiver" node can be used. - -Required Properties: - max-bitrate: a positive non 0 value that determines the max - speed that CAN/CAN-FD can run. Any other value - will be ignored. - -Examples: - -Based on Texas Instrument's TCAN1042HGV CAN Transceiver - -m_can0 { - .... - can-transceiver { - max-bitrate = <5000000>; - }; - ... -}; diff --git a/Documentation/devicetree/bindings/net/can/can-transceiver.yaml b/Documentation/devicetree/bindings/net/can/can-transceiver.yaml new file mode 100644 index 000000000000..6396977d29e5 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/can-transceiver.yaml @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/can-transceiver.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: CAN transceiver Bindings + +description: CAN transceiver generic properties bindings + +maintainers: + - Rob Herring + +properties: + max-bitrate: + $ref: /schemas/types.yaml#/definitions/uint32 + description: a positive non 0 value that determines the max speed that CAN/CAN-FD can run. + minimum: 1 -- cgit v1.2.3 From 824674b59f72e8aed2c6b36e12ab07a2eb54769a Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 7 Feb 2020 11:03:06 +0100 Subject: dt-bindings: net: can: Convert M_CAN to json-schema Convert M_CAN bindings to json-schema Signed-off-by: Benjamin Gaignard Signed-off-by: Rob Herring --- .../devicetree/bindings/net/can/bosch,m_can.yaml | 144 +++++++++++++++++++++ .../devicetree/bindings/net/can/m_can.txt | 75 ----------- 2 files changed, 144 insertions(+), 75 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/can/bosch,m_can.yaml delete mode 100644 Documentation/devicetree/bindings/net/can/m_can.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml new file mode 100644 index 000000000000..cccf8202c8f7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -0,0 +1,144 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/bosch,m_can.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Bosch MCAN controller Bindings + +description: Bosch MCAN controller for CAN bus + +maintainers: + - Sriram Dash + +properties: + compatible: + const: bosch,m_can + + reg: + items: + - description: M_CAN registers map + - description: message RAM + + reg-names: + items: + - const: m_can + - const: message_ram + + interrupts: + items: + - description: interrupt line0 + - description: interrupt line1 + minItems: 1 + maxItems: 2 + + interrupt-names: + items: + - const: int0 + - const: int1 + minItems: 1 + maxItems: 2 + + clocks: + items: + - description: peripheral clock + - description: bus clock + + clock-names: + items: + - const: hclk + - const: cclk + + bosch,mram-cfg: + description: | + Message RAM configuration data. + Multiple M_CAN instances can share the same Message RAM + and each element(e.g Rx FIFO or Tx Buffer and etc) number + in Message RAM is also configurable, so this property is + telling driver how the shared or private Message RAM are + used by this M_CAN controller. + + The format should be as follows: + + The 'offset' is an address offset of the Message RAM where + the following elements start from. This is usually set to + 0x0 if you're using a private Message RAM. The remain cells + are used to specify how many elements are used for each FIFO/Buffer. + + M_CAN includes the following elements according to user manual: + 11-bit Filter 0-128 elements / 0-128 words + 29-bit Filter 0-64 elements / 0-128 words + Rx FIFO 0 0-64 elements / 0-1152 words + Rx FIFO 1 0-64 elements / 0-1152 words + Rx Buffers 0-64 elements / 0-1152 words + Tx Event FIFO 0-32 elements / 0-64 words + Tx Buffers 0-32 elements / 0-576 words + + Please refer to 2.4.1 Message RAM Configuration in Bosch + M_CAN user manual for details. + allOf: + - $ref: /schemas/types.yaml#/definitions/int32-array + - items: + items: + - description: The 'offset' is an address offset of the Message RAM + where the following elements start from. This is usually + set to 0x0 if you're using a private Message RAM. + default: 0 + - description: 11-bit Filter 0-128 elements / 0-128 words + minimum: 0 + maximum: 128 + - description: 29-bit Filter 0-64 elements / 0-128 words + minimum: 0 + maximum: 64 + - description: Rx FIFO 0 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Rx FIFO 1 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Rx Buffers 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Tx Event FIFO 0-32 elements / 0-64 words + minimum: 0 + maximum: 32 + - description: Tx Buffers 0-32 elements / 0-576 words + minimum: 0 + maximum: 32 + maxItems: 1 + + can-transceiver: + $ref: can-transceiver.yaml# + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + - clocks + - clock-names + - bosch,mram-cfg + +additionalProperties: false + +examples: + - | + #include + can@20e8000 { + compatible = "bosch,m_can"; + reg = <0x020e8000 0x4000>, <0x02298000 0x4000>; + reg-names = "m_can", "message_ram"; + interrupts = <0 114 0x04>, <0 114 0x04>; + interrupt-names = "int0", "int1"; + clocks = <&clks IMX6SX_CLK_CANFD>, + <&clks IMX6SX_CLK_CANFD>; + clock-names = "hclk", "cclk"; + bosch,mram-cfg = <0x0 0 0 32 0 0 0 1>; + + can-transceiver { + max-bitrate = <5000000>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/net/can/m_can.txt b/Documentation/devicetree/bindings/net/can/m_can.txt deleted file mode 100644 index ed614383af9c..000000000000 --- a/Documentation/devicetree/bindings/net/can/m_can.txt +++ /dev/null @@ -1,75 +0,0 @@ -Bosch MCAN controller Device Tree Bindings -------------------------------------------------- - -Required properties: -- compatible : Should be "bosch,m_can" for M_CAN controllers -- reg : physical base address and size of the M_CAN - registers map and Message RAM -- reg-names : Should be "m_can" and "message_ram" -- interrupts : Should be the interrupt number of M_CAN interrupt - line 0 and line 1, could be same if sharing - the same interrupt. -- interrupt-names : Should contain "int0" and "int1" -- clocks : Clocks used by controller, should be host clock - and CAN clock. -- clock-names : Should contain "hclk" and "cclk" -- pinctrl- : Pinctrl states as described in bindings/pinctrl/pinctrl-bindings.txt -- pinctrl-names : Names corresponding to the numbered pinctrl states -- bosch,mram-cfg : Message RAM configuration data. - Multiple M_CAN instances can share the same Message - RAM and each element(e.g Rx FIFO or Tx Buffer and etc) - number in Message RAM is also configurable, - so this property is telling driver how the shared or - private Message RAM are used by this M_CAN controller. - - The format should be as follows: - - The 'offset' is an address offset of the Message RAM - where the following elements start from. This is - usually set to 0x0 if you're using a private Message - RAM. The remain cells are used to specify how many - elements are used for each FIFO/Buffer. - - M_CAN includes the following elements according to user manual: - 11-bit Filter 0-128 elements / 0-128 words - 29-bit Filter 0-64 elements / 0-128 words - Rx FIFO 0 0-64 elements / 0-1152 words - Rx FIFO 1 0-64 elements / 0-1152 words - Rx Buffers 0-64 elements / 0-1152 words - Tx Event FIFO 0-32 elements / 0-64 words - Tx Buffers 0-32 elements / 0-576 words - - Please refer to 2.4.1 Message RAM Configuration in - Bosch M_CAN user manual for details. - -Optional Subnode: -- can-transceiver : Can-transceiver subnode describing maximum speed - that can be used for CAN/CAN-FD modes. See - Documentation/devicetree/bindings/net/can/can-transceiver.txt - for details. -Example: -SoC dtsi: -m_can1: can@20e8000 { - compatible = "bosch,m_can"; - reg = <0x020e8000 0x4000>, <0x02298000 0x4000>; - reg-names = "m_can", "message_ram"; - interrupts = <0 114 0x04>, - <0 114 0x04>; - interrupt-names = "int0", "int1"; - clocks = <&clks IMX6SX_CLK_CANFD>, - <&clks IMX6SX_CLK_CANFD>; - clock-names = "hclk", "cclk"; - bosch,mram-cfg = <0x0 0 0 32 0 0 0 1>; -}; - -Board dts: -&m_can1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_m_can1>; - status = "enabled"; - - can-transceiver { - max-bitrate = <5000000>; - }; -}; -- cgit v1.2.3 From 1c27e9ae0f69f1947a95b01ab21526204ed6a24c Mon Sep 17 00:00:00 2001 From: Bastian Germann Date: Fri, 14 Feb 2020 12:10:01 +0100 Subject: dt-bindings: Add vendor prefix for Linutronix Add a vendor prefix for Linutronix GmbH. Website: https://linutronix.de/ Co-developed-by: Benedikt Spranger Signed-off-by: Benedikt Spranger Signed-off-by: Bastian Germann Acked-by: Maxime Ripard Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index ac6aa3332b28..9260980bc83a 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -547,6 +547,8 @@ patternProperties: description: LinkSprite Technologies, Inc. "^linksys,.*": description: Belkin International, Inc. (Linksys) + "^linutronix,.*": + description: Linutronix GmbH "^linux,.*": description: Linux-specific binding "^linx,.*": -- cgit v1.2.3 From 5c36abcd2621adc3d50d05628f0ef0be6e7840a9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 19 Feb 2020 18:35:02 +0100 Subject: ASoC: meson: add t9015 internal codec binding documentation Add the DT binding documention of the internal DAC found in the Amlogic gxl, g12a and sm1 SoC family. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20200219173503.1112561-2-jbrunet@baylibre.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/amlogic,t9015.yaml | 58 ++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/amlogic,t9015.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/amlogic,t9015.yaml b/Documentation/devicetree/bindings/sound/amlogic,t9015.yaml new file mode 100644 index 000000000000..b7c38c2b5b54 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/amlogic,t9015.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/amlogic,t9015.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic T9015 Internal Audio DAC + +maintainers: + - Jerome Brunet + +properties: + $nodename: + pattern: "^audio-controller@.*" + + "#sound-dai-cells": + const: 0 + + compatible: + items: + - const: amlogic,t9015 + + clocks: + items: + - description: Peripheral clock + + clock-names: + items: + - const: pclk + + reg: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - "#sound-dai-cells" + - compatible + - reg + - clocks + - clock-names + - resets + +examples: + - | + #include + #include + + acodec: audio-controller@32000 { + compatible = "amlogic,t9015"; + reg = <0x0 0x32000 0x0 0x14>; + #sound-dai-cells = <0>; + clocks = <&clkc CLKID_AUDIO_CODEC>; + clock-names = "pclk"; + resets = <&reset RESET_AUDIO_CODEC>; + }; + -- cgit v1.2.3 From 95e9e205fcbe34d003c558e0a98e6ae6f9ab3a61 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Fri, 7 Feb 2020 13:03:45 +0100 Subject: ASoC: dt-bindings: stm32: convert i2s to json-schema Convert the STM32 I2S bindings to DT schema format using json-schema. Signed-off-by: Olivier Moysan Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200207120345.24672-1-olivier.moysan@st.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/st,stm32-i2s.txt | 62 --------------- .../devicetree/bindings/sound/st,stm32-i2s.yaml | 87 ++++++++++++++++++++++ 2 files changed, 87 insertions(+), 62 deletions(-) delete mode 100644 Documentation/devicetree/bindings/sound/st,stm32-i2s.txt create mode 100644 Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/st,stm32-i2s.txt b/Documentation/devicetree/bindings/sound/st,stm32-i2s.txt deleted file mode 100644 index cbf24bcd1b8d..000000000000 --- a/Documentation/devicetree/bindings/sound/st,stm32-i2s.txt +++ /dev/null @@ -1,62 +0,0 @@ -STMicroelectronics STM32 SPI/I2S Controller - -The SPI/I2S block supports I2S/PCM protocols when configured on I2S mode. -Only some SPI instances support I2S. - -Required properties: - - compatible: Must be "st,stm32h7-i2s" - - reg: Offset and length of the device's register set. - - interrupts: Must contain the interrupt line id. - - clocks: Must contain phandle and clock specifier pairs for each entry - in clock-names. - - clock-names: Must contain "i2sclk", "pclk", "x8k" and "x11k". - "i2sclk": clock which feeds the internal clock generator - "pclk": clock which feeds the peripheral bus interface - "x8k": I2S parent clock for sampling rates multiple of 8kHz. - "x11k": I2S parent clock for sampling rates multiple of 11.025kHz. - - dmas: DMA specifiers for tx and rx dma. - See Documentation/devicetree/bindings/dma/stm32-dma.txt. - - dma-names: Identifier for each DMA request line. Must be "tx" and "rx". - - pinctrl-names: should contain only value "default" - - pinctrl-0: see Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml - -Optional properties: - - resets: Reference to a reset controller asserting the reset controller - -The device node should contain one 'port' child node with one child 'endpoint' -node, according to the bindings defined in Documentation/devicetree/bindings/ -graph.txt. - -Example: -sound_card { - compatible = "audio-graph-card"; - dais = <&i2s2_port>; -}; - -i2s2: audio-controller@40003800 { - compatible = "st,stm32h7-i2s"; - reg = <0x40003800 0x400>; - interrupts = <36>; - clocks = <&rcc PCLK1>, <&rcc SPI2_CK>, <&rcc PLL1_Q>, <&rcc PLL2_P>; - clock-names = "pclk", "i2sclk", "x8k", "x11k"; - dmas = <&dmamux2 2 39 0x400 0x1>, - <&dmamux2 3 40 0x400 0x1>; - dma-names = "rx", "tx"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2s2>; - - i2s2_port: port@0 { - cpu_endpoint: endpoint { - remote-endpoint = <&codec_endpoint>; - format = "i2s"; - }; - }; -}; - -audio-codec { - codec_port: port@0 { - codec_endpoint: endpoint { - remote-endpoint = <&cpu_endpoint>; - }; - }; -}; diff --git a/Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml b/Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml new file mode 100644 index 000000000000..f32410890589 --- /dev/null +++ b/Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/st,stm32-i2s.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: STMicroelectronics STM32 SPI/I2S Controller + +maintainers: + - Olivier Moysan + +description: + The SPI/I2S block supports I2S/PCM protocols when configured on I2S mode. + Only some SPI instances support I2S. + +properties: + compatible: + enum: + - st,stm32h7-i2s + + "#sound-dai-cells": + const: 0 + + reg: + maxItems: 1 + + clocks: + items: + - description: clock feeding the peripheral bus interface. + - description: clock feeding the internal clock generator. + - description: I2S parent clock for sampling rates multiple of 8kHz. + - description: I2S parent clock for sampling rates multiple of 11.025kHz. + + clock-names: + items: + - const: pclk + - const: i2sclk + - const: x8k + - const: x11k + + interrupts: + maxItems: 1 + + dmas: + items: + - description: audio capture DMA. + - description: audio playback DMA. + + dma-names: + items: + - const: rx + - const: tx + + resets: + maxItems: 1 + +required: + - compatible + - "#sound-dai-cells" + - reg + - clocks + - clock-names + - interrupts + - dmas + - dma-names + +additionalProperties: false + +examples: + - | + #include + #include + i2s2: audio-controller@4000b000 { + compatible = "st,stm32h7-i2s"; + #sound-dai-cells = <0>; + reg = <0x4000b000 0x400>; + clocks = <&rcc SPI2>, <&rcc SPI2_K>, <&rcc PLL3_Q>, <&rcc PLL3_R>; + clock-names = "pclk", "i2sclk", "x8k", "x11k"; + interrupts = ; + dmas = <&dmamux1 39 0x400 0x01>, + <&dmamux1 40 0x400 0x01>; + dma-names = "rx", "tx"; + pinctrl-names = "default"; + pinctrl-0 = <&i2s2_pins_a>; + }; + +... -- cgit v1.2.3 From a153182716c0dfbe6d929f32e89a3a2c3399c4b2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 10 Feb 2020 15:04:16 +0100 Subject: dt-bindings: usb: exynos-usb: Document clock names for DWC3 bindings The Exynos DWC3 driver expects certain clock names, depending on used compatible. Document this explicitly in the bindings. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/usb/exynos-usb.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/usb/exynos-usb.txt b/Documentation/devicetree/bindings/usb/exynos-usb.txt index 66c394f9e11f..6aae1544f240 100644 --- a/Documentation/devicetree/bindings/usb/exynos-usb.txt +++ b/Documentation/devicetree/bindings/usb/exynos-usb.txt @@ -78,7 +78,14 @@ Required properties: - ranges: allows valid 1:1 translation between child's address space and parent's address space - clocks: Clock IDs array as required by the controller. - - clock-names: names of clocks correseponding to IDs in the clock property + - clock-names: Names of clocks corresponding to IDs in the clock property. + Following clock names shall be provided for different + compatibles: + - samsung,exynos5250-dwusb3: "usbdrd30", + - samsung,exynos5433-dwusb3: "aclk", "susp_clk", "pipe_pclk", + "phyclk", + - samsung,exynos7-dwusb3: "usbdrd30", "usbdrd30_susp_clk", + "usbdrd30_axius_clk" - vdd10-supply: 1.0V powr supply - vdd33-supply: 3.0V/3.3V power supply -- cgit v1.2.3 From 94d57ebd233a39d4d28fcd4de13a092146da3f4f Mon Sep 17 00:00:00 2001 From: Alexey Minnekhanov Date: Wed, 12 Feb 2020 20:09:12 +0300 Subject: dt-bindings: Add vendor prefix for Xiaomi Xiaomi Corporation is a Chinese electronics company. Signed-off-by: Alexey Minnekhanov Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 9260980bc83a..a97601e0e537 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -1086,6 +1086,8 @@ patternProperties: description: X-Powers "^xes,.*": description: Extreme Engineering Solutions (X-ES) + "^xiaomi,.*": + description: Xiaomi Technology Co., Ltd. "^xillybus,.*": description: Xillybus Ltd. "^xinpeng,.*": -- cgit v1.2.3 From faf8e30acb219849725aa75302d36e0ffdb6a258 Mon Sep 17 00:00:00 2001 From: Alexey Minnekhanov Date: Wed, 12 Feb 2020 20:09:13 +0300 Subject: dt-bindings: arm: Add kryo260 compatible Kryo260 is found in SDM660, so add it in list of cpu compatibles Signed-off-by: Alexey Minnekhanov Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/arm/cpus.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 7a9c3ce2dbef..765af3cbd11f 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -155,6 +155,7 @@ properties: - nvidia,tegra194-carmel - qcom,krait - qcom,kryo + - qcom,kryo260 - qcom,kryo385 - qcom,kryo485 - qcom,scorpion -- cgit v1.2.3 From 3c87402771f2bc38ee92eb506bb58e4fc4a5b8b9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 Feb 2020 20:54:21 -0800 Subject: Documentation: power: fix pm_qos_interface.rst format warning Fix Sphinx warnings by indenting the bullet list (and making it unnumbered). Documentation/power/pm_qos_interface.rst:12: WARNING: Unexpected indentation. Signed-off-by: Randy Dunlap Signed-off-by: Rafael J. Wysocki --- Documentation/power/pm_qos_interface.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst index 064f668fbdab..69b0fe3e2542 100644 --- a/Documentation/power/pm_qos_interface.rst +++ b/Documentation/power/pm_qos_interface.rst @@ -7,8 +7,8 @@ performance expectations by drivers, subsystems and user space applications on one of the parameters. Two different PM QoS frameworks are available: -1. CPU latency QoS. -2. The per-device PM QoS framework provides the API to manage the + * CPU latency QoS. + * The per-device PM QoS framework provides the API to manage the per-device latency constraints and PM QoS flags. The latency unit used in the PM QoS framework is the microsecond (usec). -- cgit v1.2.3 From 612e31e01d8e8b9fd2b803c355f74c9d934f461e Mon Sep 17 00:00:00 2001 From: Andrey Lebedev Date: Wed, 19 Feb 2020 20:08:57 +0200 Subject: dt-bindings: display: sun4i: New compatibles for A20 tcons Document new compatibles used to differentiate between timing controllers on A20 (sun7i) Signed-off-by: Andrey Lebedev Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200219180858.4806-5-andrey.lebedev@gmail.com --- .../devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml index 86ad617d2327..bbd500b024d3 100644 --- a/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml +++ b/Documentation/devicetree/bindings/display/allwinner,sun4i-a10-tcon.yaml @@ -34,6 +34,12 @@ properties: - const: allwinner,sun9i-a80-tcon-lcd - const: allwinner,sun9i-a80-tcon-tv + - items: + - enum: + - allwinner,sun7i-a20-tcon0 + - allwinner,sun7i-a20-tcon1 + - const: allwinner,sun7i-a20-tcon + - items: - enum: - allwinner,sun50i-a64-tcon-lcd -- cgit v1.2.3 From 6650cdd9a8ccf00555dbbe743d58541ad8feb6a7 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Sun, 26 Jan 2020 12:05:35 -0800 Subject: x86/split_lock: Enable split lock detection by kernel A split-lock occurs when an atomic instruction operates on data that spans two cache lines. In order to maintain atomicity the core takes a global bus lock. This is typically >1000 cycles slower than an atomic operation within a cache line. It also disrupts performance on other cores (which must wait for the bus lock to be released before their memory operations can complete). For real-time systems this may mean missing deadlines. For other systems it may just be very annoying. Some CPUs have the capability to raise an #AC trap when a split lock is attempted. Provide a command line option to give the user choices on how to handle this: split_lock_detect= off - not enabled (no traps for split locks) warn - warn once when an application does a split lock, but allow it to continue running. fatal - Send SIGBUS to applications that cause split lock On systems that support split lock detection the default is "warn". Note that if the kernel hits a split lock in any mode other than "off" it will OOPs. One implementation wrinkle is that the MSR to control the split lock detection is per-core, not per thread. This might result in some short lived races on HT systems in "warn" mode if Linux tries to enable on one thread while disabling on the other. Race analysis by Sean Christopherson: - Toggling of split-lock is only done in "warn" mode. Worst case scenario of a race is that a misbehaving task will generate multiple #AC exceptions on the same instruction. And this race will only occur if both siblings are running tasks that generate split-lock #ACs, e.g. a race where sibling threads are writing different values will only occur if CPUx is disabling split-lock after an #AC and CPUy is re-enabling split-lock after *its* previous task generated an #AC. - Transitioning between off/warn/fatal modes at runtime isn't supported and disabling is tracked per task, so hardware will always reach a steady state that matches the configured mode. I.e. split-lock is guaranteed to be enabled in hardware once all _TIF_SLD threads have been scheduled out. Signed-off-by: Peter Zijlstra (Intel) Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu Co-developed-by: Tony Luck Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20200126200535.GB30377@agluck-desk2.amr.corp.intel.com --- Documentation/admin-guide/kernel-parameters.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..62c2b0b6922e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4655,6 +4655,28 @@ spia_pedr= spia_peddr= + split_lock_detect= + [X86] Enable split lock detection + + When enabled (and if hardware support is present), atomic + instructions that access data across cache line + boundaries will result in an alignment check exception. + + off - not enabled + + warn - the kernel will emit rate limited warnings + about applications triggering the #AC + exception. This mode is the default on CPUs + that supports split lock detection. + + fatal - the kernel will send SIGBUS to applications + that trigger the #AC exception. + + If an #AC exception is hit in the kernel or in + firmware (i.e. not while executing in user mode) + the kernel will oops in either "warn" or "fatal" + mode. + srcutree.counter_wrap_check [KNL] Specifies how frequently to check for grace-period sequence counter wrap for the -- cgit v1.2.3 From 4ee67cbd97668ab1b17d86d85348302c0b7490cd Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Thu, 20 Feb 2020 15:07:58 -0600 Subject: dt-bindings: sound: Add TLV320ADCx140 dt bindings Add dt bindings for the TLV320ADCx140 Burr-Brown ADC. The initial support is for the following: TLV320ADC3140 - http://www.ti.com/lit/gpn/tlv320adc3140 TLV320ADC5140 - http://www.ti.com/lit/gpn/tlv320adc5140 TLV320ADC6140 - http://www.ti.com/lit/gpn/tlv320adc6140 Signed-off-by: Dan Murphy Link: https://lore.kernel.org/r/20200220210759.31466-2-dmurphy@ti.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/tlv320adcx140.yaml | 83 ++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/tlv320adcx140.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/tlv320adcx140.yaml b/Documentation/devicetree/bindings/sound/tlv320adcx140.yaml new file mode 100644 index 000000000000..1433ff62b14f --- /dev/null +++ b/Documentation/devicetree/bindings/sound/tlv320adcx140.yaml @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause) +# Copyright (C) 2019 Texas Instruments Incorporated +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/tlv320adcx140.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments TLV320ADCX140 Quad Channel Analog-to-Digital Converter + +maintainers: + - Dan Murphy + +description: | + The TLV320ADCX140 are multichannel (4-ch analog recording or 8-ch digital + PDM microphones recording), high-performance audio, analog-to-digital + converter (ADC) with analog inputs supporting up to 2V RMS. The TLV320ADCX140 + family supports line and microphone Inputs, and offers a programmable + microphone bias or supply voltage generation. + + Specifications can be found at: + http://www.ti.com/lit/ds/symlink/tlv320adc3140.pdf + http://www.ti.com/lit/ds/symlink/tlv320adc5140.pdf + http://www.ti.com/lit/ds/symlink/tlv320adc6140.pdf + +properties: + compatible: + oneOf: + - const: ti,tlv320adc3140 + - const: ti,tlv320adc5140 + - const: ti,tlv320adc6140 + + reg: + maxItems: 1 + description: | + I2C addresss of the device can be one of these 0x4c, 0x4d, 0x4e or 0x4f + + reset-gpios: + description: | + GPIO used for hardware reset. + + areg-supply: + description: | + Regulator with AVDD at 3.3V. If not defined then the internal regulator + is enabled. + + ti,mic-bias-source: + description: | + Indicates the source for MIC Bias. + 0 - Mic bias is set to VREF + 1 - Mic bias is set to VREF × 1.096 + 6 - Mic bias is set to AVDD + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - enum: [0, 1, 6] + + ti,vref-source: + description: | + Indicates the source for MIC Bias. + 0 - Set VREF to 2.75V + 1 - Set VREF to 2.5V + 2 - Set VREF to 1.375V + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - enum: [0, 1, 2] + +required: + - compatible + - reg + +examples: + - | + #include + i2c0 { + #address-cells = <1>; + #size-cells = <0>; + codec: codec@4c { + compatible = "ti,tlv320adc5140"; + reg = <0x4c>; + ti,use-internal-areg; + ti,mic-bias-source = <6>; + reset-gpios = <&gpio0 14 GPIO_ACTIVE_HIGH>; + }; + }; -- cgit v1.2.3 From b2b00ddf193bf83dc561d965c67b18eb54ebcd83 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 30 Oct 2019 11:56:10 -0700 Subject: rcu: React to callback overload by aggressively seeking quiescent states In default configutions, RCU currently waits at least 100 milliseconds before asking cond_resched() and/or resched_rcu() for help seeking quiescent states to end a grace period. But 100 milliseconds can be one good long time during an RCU callback flood, for example, as can happen when user processes repeatedly open and close files in a tight loop. These 100-millisecond gaps in successive grace periods during a callback flood can result in excessive numbers of callbacks piling up, unnecessarily increasing memory footprint. This commit therefore asks cond_resched() and/or resched_rcu() for help as early as the first FQS scan when at least one of the CPUs has more than 20,000 callbacks queued, a number that can be changed using the new rcutree.qovld kernel boot parameter. An auxiliary qovld_calc variable is used to avoid acquisition of locks that have not yet been initialized. Early tests indicate that this reduces the RCU-callback memory footprint during rcutorture floods by from 50% to 4x, depending on configuration. Reported-by: Joel Fernandes (Google) Reported-by: Tejun Heo [ paulmck: Fix bug located by Qian Cai. ] Signed-off-by: Paul E. McKenney Tested-by: Dexuan Cui Tested-by: Qian Cai --- Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..dbd4b4a65209 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3980,6 +3980,15 @@ Set threshold of queued RCU callbacks below which batch limiting is re-enabled. + rcutree.qovld= [KNL] + Set threshold of queued RCU callbacks beyond which + RCU's force-quiescent-state scan will aggressively + enlist help from cond_resched() and sched IPIs to + help CPUs more quickly reach quiescent states. + Set to less than zero to make this be set based + on rcutree.qhimark at boot time and to zero to + disable more aggressive help enlistment. + rcutree.rcu_idle_gp_delay= [KNL] Set wakeup interval for idle CPUs that have RCU callbacks (RCU_FAST_NO_HZ=y). -- cgit v1.2.3 From 58c53360b36d2077cbb843e7ad2bf75f0498271c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Dec 2019 11:29:01 -0800 Subject: rcutorture: Allow boottime stall warnings to be suppressed In normal production, an RCU CPU stall warning at boottime is often just as bad as at any other time. In fact, given the desire for fast boot, any sort of long-term stall at boot is a bad idea. However, heavy rcutorture testing on large hyperthreaded systems can generate boottime RCU CPU stalls as a matter of course. This commit therefore provides a kernel boot parameter that suppresses reporting of boottime RCU CPU stall warnings and similarly of rcutorture writer stalls. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..ee007b5c874f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4195,6 +4195,12 @@ rcupdate.rcu_cpu_stall_suppress= [KNL] Suppress RCU CPU stall warning messages. + rcupdate.rcu_cpu_stall_suppress_at_boot= [KNL] + Suppress RCU CPU stall warning messages and + rcutorture writer stall warnings that occur + during early boot, that is, during the time + before the init task is spawned. + rcupdate.rcu_cpu_stall_timeout= [KNL] Set timeout for RCU CPU stall warning messages. -- cgit v1.2.3 From 8171d3e0dafd37a9c833904e5a936f4154a1e95b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 6 Dec 2019 15:02:59 -0800 Subject: torture: Allow disabling of boottime CPU-hotplug torture operations In theory, RCU-hotplug operations are supposed to work as soon as there is more than one CPU online. However, in practice, in normal production there is no way to make them happen until userspace is up and running. Besides which, on smaller systems, rcutorture doesn't start doing hotplug operations until 30 seconds after the start of boot, which on most systems also means the better part of 30 seconds after the end of boot. This commit therefore provides a new torture.disable_onoff_at_boot kernel boot parameter that suppresses CPU-hotplug torture operations until about the time that init is spawned. Of course, if you know of a need for boottime CPU-hotplug operations, then you should avoid passing this argument to any of the torture tests. You might also want to look at the splats linked to below. Link: https://lore.kernel.org/lkml/20191206185208.GA25636@paulmck-ThinkPad-P72/ Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ee007b5c874f..868f59a48580 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4873,6 +4873,10 @@ topology updates sent by the hypervisor to this LPAR. + torture.disable_onoff_at_boot= [KNL] + Prevent the CPU-hotplug component of torturing + until after init has spawned. + tp720= [HW,PS2] tpm_suspend_pcr=[HW,TPM] -- cgit v1.2.3 From e42da4c62abb547d9c9138e0e7fcd1f36057b5e8 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 20 Feb 2020 16:43:54 -0800 Subject: docs/bpf: Update bpf development Q/A file bpf now has its own mailing list bpf@vger.kernel.org. Update the bpf_devel_QA.rst file to reflect this. Also llvm has switch to github with llvm and clang in the same repo https://github.com/llvm/llvm-project.git. Update the QA file with newer build instructions. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200221004354.930952-1-yhs@fb.com --- Documentation/bpf/bpf_devel_QA.rst | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'Documentation') diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst index c9856b927055..38c15c6fcb14 100644 --- a/Documentation/bpf/bpf_devel_QA.rst +++ b/Documentation/bpf/bpf_devel_QA.rst @@ -20,11 +20,11 @@ Reporting bugs Q: How do I report bugs for BPF kernel code? -------------------------------------------- A: Since all BPF kernel development as well as bpftool and iproute2 BPF -loader development happens through the netdev kernel mailing list, +loader development happens through the bpf kernel mailing list, please report any found issues around BPF to the following mailing list: - netdev@vger.kernel.org + bpf@vger.kernel.org This may also include issues related to XDP, BPF tracing, etc. @@ -46,17 +46,12 @@ Submitting patches Q: To which mailing list do I need to submit my BPF patches? ------------------------------------------------------------ -A: Please submit your BPF patches to the netdev kernel mailing list: +A: Please submit your BPF patches to the bpf kernel mailing list: - netdev@vger.kernel.org - -Historically, BPF came out of networking and has always been maintained -by the kernel networking community. Although these days BPF touches -many other subsystems as well, the patches are still routed mainly -through the networking community. + bpf@vger.kernel.org In case your patch has changes in various different subsystems (e.g. -tracing, security, etc), make sure to Cc the related kernel mailing +networking, tracing, security, etc), make sure to Cc the related kernel mailing lists and maintainers from there as well, so they are able to review the changes and provide their Acked-by's to the patches. @@ -168,7 +163,7 @@ a BPF point of view. Be aware that this is not a final verdict that the patch will automatically get accepted into net or net-next trees eventually: -On the netdev kernel mailing list reviews can come in at any point +On the bpf kernel mailing list reviews can come in at any point in time. If discussions around a patch conclude that they cannot get included as-is, we will either apply a follow-up fix or drop them from the trees entirely. Therefore, we also reserve to rebase @@ -494,15 +489,15 @@ A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have that set up, proceed with building the latest LLVM and clang version from the git repositories:: - $ git clone http://llvm.org/git/llvm.git - $ cd llvm/tools - $ git clone --depth 1 http://llvm.org/git/clang.git - $ cd ..; mkdir build; cd build - $ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \ + $ git clone https://github.com/llvm/llvm-project.git + $ mkdir -p llvm-project/llvm/build/install + $ cd llvm-project/llvm/build + $ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \ + -DLLVM_ENABLE_PROJECTS="clang" \ -DBUILD_SHARED_LIBS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DLLVM_BUILD_RUNTIME=OFF - $ make -j $(getconf _NPROCESSORS_ONLN) + $ ninja The built binaries can then be found in the build/bin/ directory, where you can point the PATH variable to. -- cgit v1.2.3 From 5be8badcb64be4c6ac5b0e8b882eca8eb175ec2d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Thu, 7 Nov 2019 20:12:18 +1030 Subject: dt-bindings: fttmr010: Add ast2600 compatible The ast2600 contains a fttmr010 derivative. Reviewed-by: Linus Walleij Acked-by: Rob Herring Signed-off-by: Joel Stanley Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20191107094218.13210-5-joel@jms.id.au --- Documentation/devicetree/bindings/timer/faraday,fttmr010.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt b/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt index 195792270414..3cb2f4c98d64 100644 --- a/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt +++ b/Documentation/devicetree/bindings/timer/faraday,fttmr010.txt @@ -11,6 +11,7 @@ Required properties: "moxa,moxart-timer", "faraday,fttmr010" "aspeed,ast2400-timer" "aspeed,ast2500-timer" + "aspeed,ast2600-timer" - reg : Should contain registers location and length - interrupts : Should contain the three timer interrupts usually with -- cgit v1.2.3 From 6e256a7932930ec5dedb0eb3a8ef057eaa2b8ee9 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 6 Jan 2020 00:18:19 -0800 Subject: dt-bindings: phy-qcom-qmp: Add SDM845 PCIe to binding Add the compatible and define necessary clocks and resets for the SDM845 GEN2 QMP PCIe phy and GEN3 QHP PCIe phy. Reviewed-by: Rob Herring Reviewed-by: Vinod Koul Signed-off-by: Bjorn Andersson Signed-off-by: Kishon Vijay Abraham I --- Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt index eac9ad3cbbc8..a214ce6d0db2 100644 --- a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt +++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt @@ -12,6 +12,8 @@ Required properties: "qcom,msm8998-qmp-usb3-phy" for USB3 QMP V3 phy on msm8998, "qcom,msm8998-qmp-ufs-phy" for UFS QMP phy on msm8998, "qcom,msm8998-qmp-pcie-phy" for PCIe QMP phy on msm8998, + "qcom,sdm845-qhp-pcie-phy" for QHP PCIe phy on sdm845, + "qcom,sdm845-qmp-pcie-phy" for QMP PCIe phy on sdm845, "qcom,sdm845-qmp-usb3-phy" for USB3 QMP V3 phy on sdm845, "qcom,sdm845-qmp-usb3-uni-phy" for USB3 QMP V3 UNI phy on sdm845, "qcom,sdm845-qmp-ufs-phy" for UFS QMP phy on sdm845, @@ -52,6 +54,10 @@ Required properties: "ref", "ref_aux". For "qcom,msm8998-qmp-pcie-phy" must contain: "aux", "cfg_ahb", "ref". + For "qcom,sdm845-qhp-pcie-phy" must contain: + "aux", "cfg_ahb", "ref", "refgen". + For "qcom,sdm845-qmp-pcie-phy" must contain: + "aux", "cfg_ahb", "ref", "refgen". For "qcom,sdm845-qmp-usb3-phy" must contain: "aux", "cfg_ahb", "ref", "com_aux". For "qcom,sdm845-qmp-usb3-uni-phy" must contain: @@ -80,6 +86,10 @@ Required properties: "ufsphy". For "qcom,msm8998-qmp-pcie-phy" must contain: "phy", "common". + For "qcom,sdm845-qhp-pcie-phy" must contain: + "phy". + For "qcom,sdm845-qmp-pcie-phy" must contain: + "phy". For "qcom,sdm845-qmp-usb3-phy" must contain: "phy", "common". For "qcom,sdm845-qmp-usb3-uni-phy" must contain: -- cgit v1.2.3 From 3edeb49525bbe34441618fdc5912b1ec37b2a61c Mon Sep 17 00:00:00 2001 From: Hou Zhiqiang Date: Thu, 13 Feb 2020 12:06:41 +0800 Subject: dt-bindings: PCI: Add NXP Layerscape SoCs PCIe Gen4 controller Add PCIe Gen4 controller DT bindings of NXP Layerscape SoCs. Signed-off-by: Hou Zhiqiang Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/pci/layerscape-pcie-gen4.txt | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt b/Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt new file mode 100644 index 000000000000..b40fb5d15d3d --- /dev/null +++ b/Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt @@ -0,0 +1,52 @@ +NXP Layerscape PCIe Gen4 controller + +This PCIe controller is based on the Mobiveil PCIe IP and thus inherits all +the common properties defined in mobiveil-pcie.txt. + +Required properties: +- compatible: should contain the platform identifier such as: + "fsl,lx2160a-pcie" +- reg: base addresses and lengths of the PCIe controller register blocks. + "csr_axi_slave": Bridge config registers + "config_axi_slave": PCIe controller registers +- interrupts: A list of interrupt outputs of the controller. Must contain an + entry for each entry in the interrupt-names property. +- interrupt-names: It could include the following entries: + "intr": The interrupt that is asserted for controller interrupts + "aer": Asserted for aer interrupt when chip support the aer interrupt with + none MSI/MSI-X/INTx mode,but there is interrupt line for aer. + "pme": Asserted for pme interrupt when chip support the pme interrupt with + none MSI/MSI-X/INTx mode,but there is interrupt line for pme. +- dma-coherent: Indicates that the hardware IP block can ensure the coherency + of the data transferred from/to the IP block. This can avoid the software + cache flush/invalid actions, and improve the performance significantly. +- msi-parent : See the generic MSI binding described in + Documentation/devicetree/bindings/interrupt-controller/msi.txt. + +Example: + + pcie@3400000 { + compatible = "fsl,lx2160a-pcie"; + reg = <0x00 0x03400000 0x0 0x00100000 /* controller registers */ + 0x80 0x00000000 0x0 0x00001000>; /* configuration space */ + reg-names = "csr_axi_slave", "config_axi_slave"; + interrupts = , /* AER interrupt */ + , /* PME interrupt */ + ; /* controller interrupt */ + interrupt-names = "aer", "pme", "intr"; + #address-cells = <3>; + #size-cells = <2>; + device_type = "pci"; + apio-wins = <8>; + ppio-wins = <8>; + dma-coherent; + bus-range = <0x0 0xff>; + msi-parent = <&its>; + ranges = <0x82000000 0x0 0x40000000 0x80 0x40000000 0x0 0x40000000>; + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 0 7>; + interrupt-map = <0000 0 0 1 &gic 0 0 GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>, + <0000 0 0 2 &gic 0 0 GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>, + <0000 0 0 3 &gic 0 0 GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>, + <0000 0 0 4 &gic 0 0 GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>; + }; -- cgit v1.2.3 From 62209c9ad2ac29431e91392afb0bd6332ae4c33e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Thu, 20 Feb 2020 21:57:09 +0100 Subject: ASoC: meson: aiu: Document Meson8 and Meson8b support in the dt-bindings The AIU audio output controller on the Meson8 and Meson8b SoC families is compatible with the one found in the GXBB family. Document the compatible string for these two older SoCs. Signed-off-by: Martin Blumenstingl Reviewed-by: Jerome Brunet Link: https://lore.kernel.org/r/20200220205711.77953-2-martin.blumenstingl@googlemail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/amlogic,aiu.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml b/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml index 3ef7632dcb59..a61bccf915d8 100644 --- a/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml +++ b/Documentation/devicetree/bindings/sound/amlogic,aiu.yaml @@ -21,6 +21,8 @@ properties: - enum: - amlogic,aiu-gxbb - amlogic,aiu-gxl + - amlogic,aiu-meson8 + - amlogic,aiu-meson8b - const: amlogic,aiu -- cgit v1.2.3 From b3a3740c35d6072e66eaa3a3bebaa9b70f566fda Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 18 Feb 2020 15:51:37 +0800 Subject: dt-bindings: pinctrl: Convert i.MX8MQ to json-schema Convert the i.MX8MQ pinctrl binding to DT schema format using json-schema Signed-off-by: Anson Huang Link: https://lore.kernel.org/r/1582012300-30260-1-git-send-email-Anson.Huang@nxp.com Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../bindings/pinctrl/fsl,imx8mq-pinctrl.txt | 36 ---------- .../bindings/pinctrl/fsl,imx8mq-pinctrl.yaml | 82 ++++++++++++++++++++++ 2 files changed, 82 insertions(+), 36 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.txt create mode 100644 Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.txt deleted file mode 100644 index 66de75090458..000000000000 --- a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.txt +++ /dev/null @@ -1,36 +0,0 @@ -* Freescale IMX8MQ IOMUX Controller - -Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory -for common binding part and usage. - -Required properties: -- compatible: "fsl,imx8mq-iomuxc" -- reg: should contain the base physical address and size of the iomuxc - registers. - -Required properties in sub-nodes: -- fsl,pins: each entry consists of 6 integers and represents the mux and config - setting for one pin. The first 5 integers are specified using a PIN_FUNC_ID macro, which can be found in - imx8mq-pinfunc.h under device tree source folder. The last integer CONFIG is - the pad setting value like pull-up on this pin. Please refer to i.MX8M Quad - Reference Manual for detailed CONFIG settings. - -Examples: - -&uart1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1>; -}; - -iomuxc: pinctrl@30330000 { - compatible = "fsl,imx8mq-iomuxc"; - reg = <0x0 0x30330000 0x0 0x10000>; - - pinctrl_uart1: uart1grp { - fsl,pins = < - MX8MQ_IOMUXC_UART1_RXD_UART1_DCE_RX 0x49 - MX8MQ_IOMUXC_UART1_TXD_UART1_DCE_TX 0x49 - >; - }; -}; diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.yaml new file mode 100644 index 000000000000..b30c704fcfa1 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mq-pinctrl.yaml @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/fsl,imx8mq-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale IMX8MQ IOMUX Controller + +maintainers: + - Anson Huang + +description: + Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory + for common binding part and usage. + +properties: + compatible: + const: fsl,imx8mq-iomuxc + + reg: + maxItems: 1 + +# Client device subnode's properties +patternProperties: + 'grp$': + type: object + description: + Pinctrl node's client devices use subnodes for desired pin configuration. + Client device subnodes use below standard properties. + + properties: + fsl,pins: + description: + each entry consists of 6 integers and represents the mux and config + setting for one pin. The first 5 integers are specified using a PIN_FUNC_ID macro, which can + be found in . The last + integer CONFIG is the pad setting value like pull-up on this pin. Please + refer to i.MX8M Quad Reference Manual for detailed CONFIG settings. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-matrix + - items: + items: + - description: | + "mux_reg" indicates the offset of mux register. + - description: | + "conf_reg" indicates the offset of pad configuration register. + - description: | + "input_reg" indicates the offset of select input register. + - description: | + "mux_val" indicates the mux value to be applied. + - description: | + "input_val" indicates the select input value to be applied. + - description: | + "pad_setting" indicates the pad configuration value to be applied. + + required: + - fsl,pins + + additionalProperties: false + +required: + - compatible + - reg + +additionalProperties: false + +examples: + # Pinmux controller node + - | + iomuxc: pinctrl@30330000 { + compatible = "fsl,imx8mq-iomuxc"; + reg = <0x30330000 0x10000>; + + pinctrl_uart1: uart1grp { + fsl,pins = + <0x234 0x49C 0x4F4 0x0 0x0 0x49>, + <0x238 0x4A0 0x4F4 0x0 0x0 0x49>; + }; + }; + +... -- cgit v1.2.3 From 03b4154183a2dbecfb6db8ba0bb684a42c7bec6b Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 18 Feb 2020 15:51:38 +0800 Subject: dt-bindings: pinctrl: Convert i.MX8MM to json-schema Convert the i.MX8MM pinctrl binding to DT schema format using json-schema Signed-off-by: Anson Huang Link: https://lore.kernel.org/r/1582012300-30260-2-git-send-email-Anson.Huang@nxp.com Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../bindings/pinctrl/fsl,imx8mm-pinctrl.txt | 36 ---------- .../bindings/pinctrl/fsl,imx8mm-pinctrl.yaml | 82 ++++++++++++++++++++++ 2 files changed, 82 insertions(+), 36 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt create mode 100644 Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt deleted file mode 100644 index e4e01c05cf83..000000000000 --- a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.txt +++ /dev/null @@ -1,36 +0,0 @@ -* Freescale IMX8MM IOMUX Controller - -Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory -for common binding part and usage. - -Required properties: -- compatible: "fsl,imx8mm-iomuxc" -- reg: should contain the base physical address and size of the iomuxc - registers. - -Required properties in sub-nodes: -- fsl,pins: each entry consists of 6 integers and represents the mux and config - setting for one pin. The first 5 integers are specified using a PIN_FUNC_ID macro, which can be found in - . The last integer CONFIG is - the pad setting value like pull-up on this pin. Please refer to i.MX8M Mini - Reference Manual for detailed CONFIG settings. - -Examples: - -&uart1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1>; -}; - -iomuxc: pinctrl@30330000 { - compatible = "fsl,imx8mm-iomuxc"; - reg = <0x0 0x30330000 0x0 0x10000>; - - pinctrl_uart1: uart1grp { - fsl,pins = < - MX8MM_IOMUXC_UART1_RXD_UART1_DCE_RX 0x140 - MX8MM_IOMUXC_UART1_TXD_UART1_DCE_TX 0x140 - >; - }; -}; diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.yaml new file mode 100644 index 000000000000..d98a3866add8 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mm-pinctrl.yaml @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/fsl,imx8mm-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale IMX8MM IOMUX Controller + +maintainers: + - Anson Huang + +description: + Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory + for common binding part and usage. + +properties: + compatible: + const: fsl,imx8mm-iomuxc + + reg: + maxItems: 1 + +# Client device subnode's properties +patternProperties: + 'grp$': + type: object + description: + Pinctrl node's client devices use subnodes for desired pin configuration. + Client device subnodes use below standard properties. + + properties: + fsl,pins: + description: + each entry consists of 6 integers and represents the mux and config + setting for one pin. The first 5 integers are specified using a PIN_FUNC_ID macro, which can + be found in . The last + integer CONFIG is the pad setting value like pull-up on this pin. Please + refer to i.MX8M Mini Reference Manual for detailed CONFIG settings. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-matrix + - items: + items: + - description: | + "mux_reg" indicates the offset of mux register. + - description: | + "conf_reg" indicates the offset of pad configuration register. + - description: | + "input_reg" indicates the offset of select input register. + - description: | + "mux_val" indicates the mux value to be applied. + - description: | + "input_val" indicates the select input value to be applied. + - description: | + "pad_setting" indicates the pad configuration value to be applied. + + required: + - fsl,pins + + additionalProperties: false + +required: + - compatible + - reg + +additionalProperties: false + +examples: + # Pinmux controller node + - | + iomuxc: pinctrl@30330000 { + compatible = "fsl,imx8mm-iomuxc"; + reg = <0x30330000 0x10000>; + + pinctrl_uart2: uart2grp { + fsl,pins = + <0x23C 0x4A4 0x4FC 0x0 0x0 0x140>, + <0x240 0x4A8 0x000 0x0 0x0 0x140>; + }; + }; + +... -- cgit v1.2.3 From f4a776f752669af78bf80f4bbc57285b0ae732b6 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 18 Feb 2020 15:51:39 +0800 Subject: dt-bindings: pinctrl: Convert i.MX8MN to json-schema Convert the i.MX8MN pinctrl binding to DT schema format using json-schema Signed-off-by: Anson Huang Link: https://lore.kernel.org/r/1582012300-30260-3-git-send-email-Anson.Huang@nxp.com Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../bindings/pinctrl/fsl,imx8mn-pinctrl.txt | 39 ---------- .../bindings/pinctrl/fsl,imx8mn-pinctrl.yaml | 82 ++++++++++++++++++++++ 2 files changed, 82 insertions(+), 39 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.txt create mode 100644 Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.txt deleted file mode 100644 index 330716c971b9..000000000000 --- a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.txt +++ /dev/null @@ -1,39 +0,0 @@ -* Freescale IMX8MN IOMUX Controller - -Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory -for common binding part and usage. - -Required properties: -- compatible: "fsl,imx8mn-iomuxc" -- reg: should contain the base physical address and size of the iomuxc - registers. - -Required properties in sub-nodes: -- fsl,pins: each entry consists of 6 integers and represents the mux and config - setting for one pin. The first 5 integers are specified using a PIN_FUNC_ID macro, which can be found in - . The last integer CONFIG is - the pad setting value like pull-up on this pin. Please refer to i.MX8M Nano - Reference Manual for detailed CONFIG settings. - -Examples: - -&uart1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1>; -}; - -iomuxc: pinctrl@30330000 { - compatible = "fsl,imx8mn-iomuxc"; - reg = <0x0 0x30330000 0x0 0x10000>; - - pinctrl_uart1: uart1grp { - fsl,pins = < - MX8MN_IOMUXC_UART1_RXD_UART1_DCE_RX 0x140 - MX8MN_IOMUXC_UART1_TXD_UART1_DCE_TX 0x140 - MX8MN_IOMUXC_UART3_RXD_UART1_DCE_CTS_B 0x140 - MX8MN_IOMUXC_UART3_TXD_UART1_DCE_RTS_B 0x140 - MX8MN_IOMUXC_SD1_DATA4_GPIO2_IO6 0x19 - >; - }; -}; diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.yaml new file mode 100644 index 000000000000..b9aa180e07e4 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mn-pinctrl.yaml @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/fsl,imx8mn-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Freescale IMX8MN IOMUX Controller + +maintainers: + - Anson Huang + +description: + Please refer to fsl,imx-pinctrl.txt and pinctrl-bindings.txt in this directory + for common binding part and usage. + +properties: + compatible: + const: fsl,imx8mn-iomuxc + + reg: + maxItems: 1 + +# Client device subnode's properties +patternProperties: + 'grp$': + type: object + description: + Pinctrl node's client devices use subnodes for desired pin configuration. + Client device subnodes use below standard properties. + + properties: + fsl,pins: + description: + each entry consists of 6 integers and represents the mux and config + setting for one pin. The first 5 integers are specified using a PIN_FUNC_ID macro, which can + be found in . The last + integer CONFIG is the pad setting value like pull-up on this pin. Please + refer to i.MX8M Nano Reference Manual for detailed CONFIG settings. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-matrix + - items: + items: + - description: | + "mux_reg" indicates the offset of mux register. + - description: | + "conf_reg" indicates the offset of pad configuration register. + - description: | + "input_reg" indicates the offset of select input register. + - description: | + "mux_val" indicates the mux value to be applied. + - description: | + "input_val" indicates the select input value to be applied. + - description: | + "pad_setting" indicates the pad configuration value to be applied. + + required: + - fsl,pins + + additionalProperties: false + +required: + - compatible + - reg + +additionalProperties: false + +examples: + # Pinmux controller node + - | + iomuxc: pinctrl@30330000 { + compatible = "fsl,imx8mn-iomuxc"; + reg = <0x30330000 0x10000>; + + pinctrl_uart2: uart2grp { + fsl,pins = + <0x23C 0x4A4 0x4FC 0x0 0x0 0x140>, + <0x240 0x4A8 0x000 0x0 0x0 0x140>; + }; + }; + +... -- cgit v1.2.3 From 61bccd918c83f275d24c4f2eaab1830ad3a1c583 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 18 Feb 2020 15:51:40 +0800 Subject: dt-bindings: pinctrl: imx8mp: Replace the uint32-array with uint32-matrix The items of mux_reg/conf_reg/input_reg/mux_val/input_val/pad_setting should be uint32-matrix instead of uint32-array, fix it and improve the schema and example. Signed-off-by: Anson Huang Link: https://lore.kernel.org/r/1582012300-30260-4-git-send-email-Anson.Huang@nxp.com Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../bindings/pinctrl/fsl,imx8mp-pinctrl.yaml | 25 ++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mp-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mp-pinctrl.yaml index 2e31e120395e..6297e78418cf 100644 --- a/Documentation/devicetree/bindings/pinctrl/fsl,imx8mp-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx8mp-pinctrl.yaml @@ -30,8 +30,6 @@ patternProperties: properties: fsl,pins: - allOf: - - $ref: /schemas/types.yaml#/definitions/uint32-array description: each entry consists of 6 integers and represents the mux and config setting for one pin. The first 5 integers . The last integer CONFIG is the pad setting value like pull-up on this pin. Please refer to i.MX8M Plus Reference Manual for detailed CONFIG settings. + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-matrix + - items: + items: + - description: | + "mux_reg" indicates the offset of mux register. + - description: | + "conf_reg" indicates the offset of pad configuration register. + - description: | + "input_reg" indicates the offset of select input register. + - description: | + "mux_val" indicates the mux value to be applied. + - description: | + "input_val" indicates the select input value to be applied. + - description: | + "pad_setting" indicates the pad configuration value to be applied. required: - fsl,pins @@ -59,10 +73,9 @@ examples: reg = <0x30330000 0x10000>; pinctrl_uart2: uart2grp { - fsl,pins = < - 0x228 0x488 0x5F0 0x0 0x6 0x49 - 0x228 0x488 0x000 0x0 0x0 0x49 - >; + fsl,pins = + <0x228 0x488 0x5F0 0x0 0x6 0x49>, + <0x228 0x488 0x000 0x0 0x0 0x49>; }; }; -- cgit v1.2.3 From f1d97dd3f38ba62fb8174cb7583870e17aef820d Mon Sep 17 00:00:00 2001 From: Ilias Apalodimas Date: Fri, 21 Feb 2020 11:15:19 +0200 Subject: net: page_pool: Add documentation on page_pool API Add documentation explaining the basic functionality and design principles of the API Signed-off-by: Ilias Apalodimas Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- Documentation/networking/page_pool.rst | 159 +++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 Documentation/networking/page_pool.rst (limited to 'Documentation') diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst new file mode 100644 index 000000000000..43088ddf95e4 --- /dev/null +++ b/Documentation/networking/page_pool.rst @@ -0,0 +1,159 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Page Pool API +============= + +The page_pool allocator is optimized for the XDP mode that uses one frame +per-page, but it can fallback on the regular page allocator APIs. + +Basic use involves replacing alloc_pages() calls with the +page_pool_alloc_pages() call. Drivers should use page_pool_dev_alloc_pages() +replacing dev_alloc_pages(). + +API keeps track of inflight pages, in order to let API user know +when it is safe to free a page_pool object. Thus, API users +must run page_pool_release_page() when a page is leaving the page_pool or +call page_pool_put_page() where appropriate in order to maintain correct +accounting. + +API user must call page_pool_put_page() once on a page, as it +will either recycle the page, or in case of refcnt > 1, it will +release the DMA mapping and inflight state accounting. + +Architecture overview +===================== + +.. code-block:: none + + +------------------+ + | Driver | + +------------------+ + ^ + | + | + | + v + +--------------------------------------------+ + | request memory | + +--------------------------------------------+ + ^ ^ + | | + | Pool empty | Pool has entries + | | + v v + +-----------------------+ +------------------------+ + | alloc (and map) pages | | get page from cache | + +-----------------------+ +------------------------+ + ^ ^ + | | + | cache available | No entries, refill + | | from ptr-ring + | | + v v + +-----------------+ +------------------+ + | Fast cache | | ptr-ring cache | + +-----------------+ +------------------+ + +API interface +============= +The number of pools created **must** match the number of hardware queues +unless hardware restrictions make that impossible. This would otherwise beat the +purpose of page pool, which is allocate pages fast from cache without locking. +This lockless guarantee naturally comes from running under a NAPI softirq. +The protection doesn't strictly have to be NAPI, any guarantee that allocating +a page will cause no race conditions is enough. + +* page_pool_create(): Create a pool. + * flags: PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV + * order: 2^order pages on allocation + * pool_size: size of the ptr_ring + * nid: preferred NUMA node for allocation + * dev: struct device. Used on DMA operations + * dma_dir: DMA direction + * max_len: max DMA sync memory size + * offset: DMA address offset + +* page_pool_put_page(): The outcome of this depends on the page refcnt. If the + driver bumps the refcnt > 1 this will unmap the page. If the page refcnt is 1 + the allocator owns the page and will try to recycle it in one of the pool + caches. If PP_FLAG_DMA_SYNC_DEV is set, the page will be synced for_device + using dma_sync_single_range_for_device(). + +* page_pool_put_full_page(): Similar to page_pool_put_page(), but will DMA sync + for the entire memory area configured in area pool->max_len. + +* page_pool_recycle_direct(): Similar to page_pool_put_full_page() but caller + must guarantee safe context (e.g NAPI), since it will recycle the page + directly into the pool fast cache. + +* page_pool_release_page(): Unmap the page (if mapped) and account for it on + inflight counters. + +* page_pool_dev_alloc_pages(): Get a page from the page allocator or page_pool + caches. + +* page_pool_get_dma_addr(): Retrieve the stored DMA address. + +* page_pool_get_dma_dir(): Retrieve the stored DMA direction. + +Coding examples +=============== + +Registration +------------ + +.. code-block:: c + + /* Page pool registration */ + struct page_pool_params pp_params = { 0 }; + struct xdp_rxq_info xdp_rxq; + int err; + + pp_params.order = 0; + /* internal DMA mapping in page_pool */ + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = DESC_NUM; + pp_params.nid = NUMA_NO_NODE; + pp_params.dev = priv->dev; + pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + page_pool = page_pool_create(&pp_params); + + err = xdp_rxq_info_reg(&xdp_rxq, ndev, 0); + if (err) + goto err_out; + + err = xdp_rxq_info_reg_mem_model(&xdp_rxq, MEM_TYPE_PAGE_POOL, page_pool); + if (err) + goto err_out; + +NAPI poller +----------- + + +.. code-block:: c + + /* NAPI Rx poller */ + enum dma_data_direction dma_dir; + + dma_dir = page_pool_get_dma_dir(dring->page_pool); + while (done < budget) { + if (some error) + page_pool_recycle_direct(page_pool, page); + if (packet_is_xdp) { + if XDP_DROP: + page_pool_recycle_direct(page_pool, page); + } else (packet_is_skb) { + page_pool_release_page(page_pool, page); + new_page = page_pool_dev_alloc_pages(page_pool); + } + } + +Driver unload +------------- + +.. code-block:: c + + /* Driver unload */ + page_pool_put_full_page(page_pool, page, false); + xdp_rxq_info_unreg(&xdp_rxq); -- cgit v1.2.3 From bd56e593da19de22284951c33ce5c419258171bf Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 21 Feb 2020 16:36:05 +0100 Subject: ASoC: meson: g12a: add toacodec dt-binding documentation Add the DT bindings and documentation of the internal audio DAC glue found on Amlogic g12a and sm1 SoC families Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20200221153607.1585499-2-jbrunet@baylibre.com Signed-off-by: Mark Brown --- .../bindings/sound/amlogic,g12a-toacodec.yaml | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml b/Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml new file mode 100644 index 000000000000..f778d3371fde --- /dev/null +++ b/Documentation/devicetree/bindings/sound/amlogic,g12a-toacodec.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/sound/amlogic,g12a-toacodec.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic G12a Internal DAC Control Glue + +maintainers: + - Jerome Brunet + +properties: + $nodename: + pattern: "^audio-controller@.*" + + "#sound-dai-cells": + const: 1 + + compatible: + oneOf: + - items: + - const: + amlogic,g12a-toacodec + - items: + - enum: + - amlogic,sm1-toacodec + - const: + amlogic,g12a-toacodec + + reg: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - "#sound-dai-cells" + - compatible + - reg + - resets + +examples: + - | + #include + + toacodec: audio-controller@740 { + compatible = "amlogic,g12a-toacodec"; + reg = <0x0 0x740 0x0 0x4>; + #sound-dai-cells = <1>; + resets = <&clkc_audio AUD_RESET_TOACODEC>; + }; -- cgit v1.2.3 From aa017ab97a223d55f4955e2864daf7a05ba1f8a2 Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Tue, 11 Feb 2020 15:54:22 +0800 Subject: uacce: Add documents for uacce Uacce (Unified/User-space-access-intended Accelerator Framework) is a kernel module targets to provide Shared Virtual Addressing (SVA) between the accelerator and process. This patch add document to explain how it works. Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jonathan Cameron Signed-off-by: Kenneth Lee Signed-off-by: Zaibo Xu Signed-off-by: Zhou Wang Signed-off-by: Zhangfei Gao Signed-off-by: Herbert Xu --- Documentation/misc-devices/uacce.rst | 176 +++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 Documentation/misc-devices/uacce.rst (limited to 'Documentation') diff --git a/Documentation/misc-devices/uacce.rst b/Documentation/misc-devices/uacce.rst new file mode 100644 index 000000000000..1db412e9b1a3 --- /dev/null +++ b/Documentation/misc-devices/uacce.rst @@ -0,0 +1,176 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Introduction of Uacce +--------------------- + +Uacce (Unified/User-space-access-intended Accelerator Framework) targets to +provide Shared Virtual Addressing (SVA) between accelerators and processes. +So accelerator can access any data structure of the main cpu. +This differs from the data sharing between cpu and io device, which share +only data content rather than address. +Because of the unified address, hardware and user space of process can +share the same virtual address in the communication. +Uacce takes the hardware accelerator as a heterogeneous processor, while +IOMMU share the same CPU page tables and as a result the same translation +from va to pa. + +:: + + __________________________ __________________________ + | | | | + | User application (CPU) | | Hardware Accelerator | + |__________________________| |__________________________| + + | | + | va | va + V V + __________ __________ + | | | | + | MMU | | IOMMU | + |__________| |__________| + | | + | | + V pa V pa + _______________________________________ + | | + | Memory | + |_______________________________________| + + + +Architecture +------------ + +Uacce is the kernel module, taking charge of iommu and address sharing. +The user drivers and libraries are called WarpDrive. + +The uacce device, built around the IOMMU SVA API, can access multiple +address spaces, including the one without PASID. + +A virtual concept, queue, is used for the communication. It provides a +FIFO-like interface. And it maintains a unified address space between the +application and all involved hardware. + +:: + + ___________________ ________________ + | | user API | | + | WarpDrive library | ------------> | user driver | + |___________________| |________________| + | | + | | + | queue fd | + | | + | | + v | + ___________________ _________ | + | | | | | mmap memory + | Other framework | | uacce | | r/w interface + | crypto/nic/others | |_________| | + |___________________| | + | | | + | register | register | + | | | + | | | + | _________________ __________ | + | | | | | | + ------------- | Device Driver | | IOMMU | | + |_________________| |__________| | + | | + | V + | ___________________ + | | | + -------------------------- | Device(Hardware) | + |___________________| + + +How does it work +---------------- + +Uacce uses mmap and IOMMU to play the trick. + +Uacce creates a chrdev for every device registered to it. New queue is +created when user application open the chrdev. The file descriptor is used +as the user handle of the queue. +The accelerator device present itself as an Uacce object, which exports as +a chrdev to the user space. The user application communicates with the +hardware by ioctl (as control path) or share memory (as data path). + +The control path to the hardware is via file operation, while data path is +via mmap space of the queue fd. + +The queue file address space: + +:: + + /** + * enum uacce_qfrt: qfrt type + * @UACCE_QFRT_MMIO: device mmio region + * @UACCE_QFRT_DUS: device user share region + */ + enum uacce_qfrt { + UACCE_QFRT_MMIO = 0, + UACCE_QFRT_DUS = 1, + }; + +All regions are optional and differ from device type to type. +Each region can be mmapped only once, otherwise -EEXIST returns. + +The device mmio region is mapped to the hardware mmio space. It is generally +used for doorbell or other notification to the hardware. It is not fast enough +as data channel. + +The device user share region is used for share data buffer between user process +and device. + + +The Uacce register API +---------------------- + +The register API is defined in uacce.h. + +:: + + struct uacce_interface { + char name[UACCE_MAX_NAME_SIZE]; + unsigned int flags; + const struct uacce_ops *ops; + }; + +According to the IOMMU capability, uacce_interface flags can be: + +:: + + /** + * UACCE Device flags: + * UACCE_DEV_SVA: Shared Virtual Addresses + * Support PASID + * Support device page faults (PCI PRI or SMMU Stall) + */ + #define UACCE_DEV_SVA BIT(0) + + struct uacce_device *uacce_alloc(struct device *parent, + struct uacce_interface *interface); + int uacce_register(struct uacce_device *uacce); + void uacce_remove(struct uacce_device *uacce); + +uacce_register results can be: + +a. If uacce module is not compiled, ERR_PTR(-ENODEV) + +b. Succeed with the desired flags + +c. Succeed with the negotiated flags, for example + + uacce_interface.flags = UACCE_DEV_SVA but uacce->flags = ~UACCE_DEV_SVA + + So user driver need check return value as well as the negotiated uacce->flags. + + +The user driver +--------------- + +The queue file mmap space will need a user driver to wrap the communication +protocol. Uacce provides some attributes in sysfs for the user driver to +match the right accelerator accordingly. +More details in Documentation/ABI/testing/sysfs-driver-uacce. -- cgit v1.2.3 From 015d239ac0142ad0e26567fd890ef8d171f13709 Mon Sep 17 00:00:00 2001 From: Kenneth Lee Date: Tue, 11 Feb 2020 15:54:23 +0800 Subject: uacce: add uacce driver Uacce (Unified/User-space-access-intended Accelerator Framework) targets to provide Shared Virtual Addressing (SVA) between accelerators and processes. So accelerator can access any data structure of the main cpu. This differs from the data sharing between cpu and io device, which share only data content rather than address. Since unified address, hardware and user space of process can share the same virtual address in the communication. Uacce create a chrdev for every registration, the queue is allocated to the process when the chrdev is opened. Then the process can access the hardware resource by interact with the queue file. By mmap the queue file space to user space, the process can directly put requests to the hardware without syscall to the kernel space. The IOMMU core only tracks mm<->device bonds at the moment, because it only needs to handle IOTLB invalidation and PASID table entries. However uacce needs a finer granularity since multiple queues from the same device can be bound to an mm. When the mm exits, all bound queues must be stopped so that the IOMMU can safely clear the PASID table entry and reallocate the PASID. An intermediate struct uacce_mm links uacce devices and queues. Note that an mm may be bound to multiple devices but an uacce_mm structure only ever belongs to a single device, because we don't need anything more complex (if multiple devices are bound to one mm, then we'll create one uacce_mm for each bond). uacce_device --+-- uacce_mm --+-- uacce_queue | '-- uacce_queue | '-- uacce_mm --+-- uacce_queue +-- uacce_queue '-- uacce_queue Reviewed-by: Greg Kroah-Hartman Reviewed-by: Jonathan Cameron Signed-off-by: Kenneth Lee Signed-off-by: Zaibo Xu Signed-off-by: Zhou Wang Signed-off-by: Jean-Philippe Brucker Signed-off-by: Zhangfei Gao Signed-off-by: Herbert Xu --- Documentation/ABI/testing/sysfs-driver-uacce | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-driver-uacce (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-driver-uacce b/Documentation/ABI/testing/sysfs-driver-uacce new file mode 100644 index 000000000000..08f2591138af --- /dev/null +++ b/Documentation/ABI/testing/sysfs-driver-uacce @@ -0,0 +1,39 @@ +What: /sys/class/uacce//api +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Api of the device + Can be any string and up to userspace to parse. + Application use the api to match the correct driver + +What: /sys/class/uacce//flags +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Attributes of the device, see UACCE_DEV_xxx flag defined in uacce.h + +What: /sys/class/uacce//available_instances +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Available instances left of the device + Return -ENODEV if uacce_ops get_available_instances is not provided + +What: /sys/class/uacce//algorithms +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Algorithms supported by this accelerator, separated by new line. + Can be any string and up to userspace to parse. + +What: /sys/class/uacce//region_mmio_size +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Size (bytes) of mmio region queue file + +What: /sys/class/uacce//region_dus_size +Date: Feb 2020 +KernelVersion: 5.7 +Contact: linux-accelerators@lists.ozlabs.org +Description: Size (bytes) of dus region queue file -- cgit v1.2.3 From 67a6af7ad1d161dbc9c139e868d5549e632923f7 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Sun, 2 Feb 2020 12:13:47 -0500 Subject: x86/boot: Remove KEEP_SEGMENTS support Commit a24e785111a3 ("i386: paravirt boot sequence") added this flag for use by paravirtualized environments such as Xen. However, Xen never made use of this flag [1], and it was only ever used by lguest [2]. Commit ecda85e70277 ("x86/lguest: Remove lguest support") removed lguest, so KEEP_SEGMENTS has lost its last user. [1] https://lore.kernel.org/lkml/4D4B097C.5050405@goop.org [2] https://www.mail-archive.com/lguest@lists.ozlabs.org/msg00469.html Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200202171353.3736319-2-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- Documentation/x86/boot.rst | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst index c9c201596c3e..fa7ddc0428c8 100644 --- a/Documentation/x86/boot.rst +++ b/Documentation/x86/boot.rst @@ -490,15 +490,11 @@ Protocol: 2.00+ kernel) to not write early messages that require accessing the display hardware directly. - Bit 6 (write): KEEP_SEGMENTS + Bit 6 (obsolete): KEEP_SEGMENTS Protocol: 2.07+ - - If 0, reload the segment registers in the 32bit entry point. - - If 1, do not reload the segment registers in the 32bit entry point. - - Assume that %cs %ds %ss %es are all set to flat segments with - a base of 0 (or the equivalent for their environment). + - This flag is obsolete. Bit 7 (write): CAN_USE_HEAP -- cgit v1.2.3 From 795d392e4abe685a38d367f7fe64ef3a46fb15ac Mon Sep 17 00:00:00 2001 From: Oleksandr Suvorov Date: Wed, 19 Feb 2020 13:00:52 +0000 Subject: dt-bindings: arm: fsl: add nxp based toradex colibri-imx7 bindings Document the NXP SoC based Toradex Colibri iMX7S/D module and the Aster carrier board devicetree bindings. Signed-off-by: Oleksandr Suvorov Signed-off-by: Shawn Guo --- Documentation/devicetree/bindings/arm/fsl.yaml | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index e654a6376bc4..239ac2c31f49 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -274,6 +274,7 @@ properties: items: - enum: - toradex,colibri-imx7s # Colibri iMX7 Solo Module + - toradex,colibri-imx7s-aster # Colibri iMX7 Solo Module on Aster Carrier Board - toradex,colibri-imx7s-eval-v3 # Colibri iMX7 Solo Module on Colibri Evaluation Board V3 - tq,imx7s-mba7 # i.MX7S TQ MBa7 with TQMa7S SoM - const: fsl,imx7s @@ -285,7 +286,9 @@ properties: - fsl,imx7d-sdb-reva # i.MX7 SabreSD Rev-A Board - novtech,imx7d-meerkat96 # i.MX7 Meerkat96 Board - toradex,colibri-imx7d # Colibri iMX7 Dual Module + - toradex,colibri-imx7d-aster # Colibri iMX7 Dual Module on Aster Carrier Board - toradex,colibri-imx7d-emmc # Colibri iMX7 Dual 1GB (eMMC) Module + - toradex,colibri-imx7d-emmc-aster # Colibri iMX7 Dual 1GB (eMMC) Module on Aster Carrier Board - toradex,colibri-imx7d-emmc-eval-v3 # Colibri iMX7 Dual 1GB (eMMC) Module on Colibri Evaluation Board V3 - toradex,colibri-imx7d-eval-v3 # Colibri iMX7 Dual Module on Colibri Evaluation Board V3 - tq,imx7d-mba7 # i.MX7D TQ MBa7 with TQMa7D SoM -- cgit v1.2.3 From 7efbada45edc660d59cee2842726b5bb1a8b0322 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sun, 23 Feb 2020 04:16:13 +0100 Subject: dt-bindings: arm: sunxi: Add PocketBook Touch Lux 3 Add a new board name. Signed-off-by: Ondrej Jirman Signed-off-by: Maxime Ripard --- Documentation/devicetree/bindings/arm/sunxi.yaml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml index 1e63c9867749..5b22b77e4bb7 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.yaml +++ b/Documentation/devicetree/bindings/arm/sunxi.yaml @@ -658,6 +658,11 @@ properties: - const: pineriver,mini-xplus - const: allwinner,sun4i-a10 + - description: PocketBook Touch Lux 3 + items: + - const: pocketbook,touch-lux-3 + - const: allwinner,sun5i-a13 + - description: Point of View Protab2-IPS9 items: - const: pov,protab2-ips9 -- cgit v1.2.3 From 297c5ac328066a753699b2beddcc31a98acb366e Mon Sep 17 00:00:00 2001 From: Jae Hyun Yoo Date: Tue, 7 Jan 2020 02:15:02 +0100 Subject: media: Documentation: dt-bindings: media: add AST2600 Video Engine support The AST2600 has Video Engine so add the compatible string into the document. Signed-off-by: Jae Hyun Yoo Acked-by: Joel Stanley Acked-by: Rob Herring Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/devicetree/bindings/media/aspeed-video.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/aspeed-video.txt b/Documentation/devicetree/bindings/media/aspeed-video.txt index ce2894506e1f..d2ca32512272 100644 --- a/Documentation/devicetree/bindings/media/aspeed-video.txt +++ b/Documentation/devicetree/bindings/media/aspeed-video.txt @@ -1,11 +1,12 @@ * Device tree bindings for Aspeed Video Engine -The Video Engine (VE) embedded in the Aspeed AST2400 and AST2500 SOCs can +The Video Engine (VE) embedded in the Aspeed AST2400/2500/2600 SOCs can capture and compress video data from digital or analog sources. Required properties: - compatible: "aspeed,ast2400-video-engine" or - "aspeed,ast2500-video-engine" + "aspeed,ast2500-video-engine" or + "aspeed,ast2600-video-engine" - reg: contains the offset and length of the VE memory region - clocks: clock specifiers for the syscon clocks associated with the VE (ordering must match the clock-names property) -- cgit v1.2.3 From 25f98947ced09b3528f52c983960847a31a0b014 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 15 Jan 2020 18:32:54 +0100 Subject: media: vidioc-queryctrl.rst: fix error code An attempt to get a BUTTON control returns EACCES, not EINVAL. BUTTON controls have V4L2_CTRL_FLAG_WRITE_ONLY set, and the documentation for that flag correctly says that getting a write-only control returns EACCES. It's the description of the BUTTON type that's wrong, so fix that so they are consistent. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/v4l/vidioc-queryctrl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst index 6690928e657b..22ff3c6abd9c 100644 --- a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst +++ b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst @@ -378,7 +378,7 @@ See also the examples in :ref:`control`. - 0 - 0 - A control which performs an action when set. Drivers must ignore - the value passed with ``VIDIOC_S_CTRL`` and return an ``EINVAL`` error + the value passed with ``VIDIOC_S_CTRL`` and return an ``EACCES`` error code on a ``VIDIOC_G_CTRL`` attempt. * - ``V4L2_CTRL_TYPE_INTEGER64`` - any -- cgit v1.2.3 From 238e4a5baa361256ae1641ad9455bb2bb359273f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 3 Feb 2020 12:41:09 +0100 Subject: media: rename VFL_TYPE_GRABBER to _VIDEO We currently have the following devnode types: enum vfl_devnode_type { VFL_TYPE_GRABBER = 0, VFL_TYPE_VBI, VFL_TYPE_RADIO, VFL_TYPE_SUBDEV, VFL_TYPE_SDR, VFL_TYPE_TOUCH, VFL_TYPE_MAX /* Shall be the last one */ }; They all make sense, except for the first: GRABBER really refers to /dev/videoX devices, which can be capture, output or m2m, so 'grabber' doesn't even refer to their function anymore. Let's call a spade a spade and rename this to VFL_TYPE_VIDEO. Signed-off-by: Hans Verkuil Acked-by: Sakari Ailus Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/kapi/v4l2-dev.rst | 4 ++-- Documentation/translations/zh_CN/video4linux/v4l2-framework.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/media/kapi/v4l2-dev.rst b/Documentation/media/kapi/v4l2-dev.rst index 4c5a15c53dbf..63c064837c00 100644 --- a/Documentation/media/kapi/v4l2-dev.rst +++ b/Documentation/media/kapi/v4l2-dev.rst @@ -185,7 +185,7 @@ This will create the character device for you. .. code-block:: c - err = video_register_device(vdev, VFL_TYPE_GRABBER, -1); + err = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (err) { video_device_release(vdev); /* or kfree(my_vdev); */ return err; @@ -201,7 +201,7 @@ types exist: ========================== ==================== ============================== :c:type:`vfl_devnode_type` Device name Usage ========================== ==================== ============================== -``VFL_TYPE_GRABBER`` ``/dev/videoX`` for video input/output devices +``VFL_TYPE_VIDEO`` ``/dev/videoX`` for video input/output devices ``VFL_TYPE_VBI`` ``/dev/vbiX`` for vertical blank data (i.e. closed captions, teletext) ``VFL_TYPE_RADIO`` ``/dev/radioX`` for radio tuners diff --git a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt index 66c7c568bd86..9c39ee58ea50 100644 --- a/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt +++ b/Documentation/translations/zh_CN/video4linux/v4l2-framework.txt @@ -649,7 +649,7 @@ video_device注册 接下来你需要注册视频设备:这会为你创建一个字符设备。 - err = video_register_device(vdev, VFL_TYPE_GRABBER, -1); + err = video_register_device(vdev, VFL_TYPE_VIDEO, -1); if (err) { video_device_release(vdev); /* or kfree(my_vdev); */ return err; @@ -660,7 +660,7 @@ video_device注册 注册哪种设备是根据类型(type)参数。存在以下类型: -VFL_TYPE_GRABBER: 用于视频输入/输出设备的 videoX +VFL_TYPE_VIDEO: 用于视频输入/输出设备的 videoX VFL_TYPE_VBI: 用于垂直消隐数据的 vbiX (例如,隐藏式字幕,图文电视) VFL_TYPE_RADIO: 用于广播调谐器的 radioX -- cgit v1.2.3 From 0c9d29eb1cceb8a155dd42769cc88b50c45f0dbe Mon Sep 17 00:00:00 2001 From: Justin Swartz Date: Mon, 3 Feb 2020 23:40:15 +0100 Subject: media: dt-bindings: Add binding for rk3228 rga Indicate that the rk3228 rga is compatible with that of the rk3288. But if any rk3228-specific quirks are identified in future that require handling logic that differs from what is provided for the rk3288, then allow for the compatibility string "rockchip,rk3228-rga" to be matched instead of "rockchip,rk3288-rga". Signed-off-by: Justin Swartz Acked-by: Rob Herring Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/devicetree/bindings/media/rockchip-rga.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/rockchip-rga.txt b/Documentation/devicetree/bindings/media/rockchip-rga.txt index fd5276abfad6..c53a8e5133f6 100644 --- a/Documentation/devicetree/bindings/media/rockchip-rga.txt +++ b/Documentation/devicetree/bindings/media/rockchip-rga.txt @@ -6,8 +6,9 @@ BitBLT, alpha blending and image blur/sharpness. Required properties: - compatible: value should be one of the following - "rockchip,rk3288-rga"; - "rockchip,rk3399-rga"; + "rockchip,rk3228-rga", "rockchip,rk3288-rga": for Rockchip RK3228 + "rockchip,rk3288-rga": for Rockchip RK3288 + "rockchip,rk3399-rga": for Rockchip RK3399 - interrupts: RGA interrupt specifier. -- cgit v1.2.3 From 218fc9f2fcfbad70e7b2111eca591b1dc8e6c8a9 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 10 Feb 2020 11:42:38 +0100 Subject: media: Documentation/media/uapi: more readable unions Avoid adding an extra columns when describing unions in the documentation. That makes it much harder to read. See e.g. VIDIOC_QUERY_EXT_CTRLS. Instead start off a union with 'union {' and end it with an extra row containing '}'. This leaves a lot more space for the description of the fields. This formatting technique was used in a few places already, but this patches fixes all remaining occurrences of 'union' in the media uAPI. Signed-off-by: Hans Verkuil Reported-by: Nicolas Dufresne Signed-off-by: Mauro Carvalho Chehab --- .../media/uapi/cec/cec-ioc-adap-g-conn-info.rst | 10 ++-- Documentation/media/uapi/cec/cec-ioc-dqevent.rst | 20 ++++---- .../uapi/mediactl/media-ioc-enum-entities.rst | 24 +++------- Documentation/media/uapi/v4l/buffer.rst | 53 +++++++-------------- Documentation/media/uapi/v4l/dev-sliced-vbi.rst | 15 +++--- .../media/uapi/v4l/pixfmt-v4l2-mplane.rst | 6 ++- Documentation/media/uapi/v4l/pixfmt-v4l2.rst | 2 - .../media/uapi/v4l/vidioc-dbg-g-chip-info.rst | 12 ++--- .../media/uapi/v4l/vidioc-dbg-g-register.rst | 12 ++--- .../media/uapi/v4l/vidioc-decoder-cmd.rst | 26 +++------- Documentation/media/uapi/v4l/vidioc-dqevent.rst | 55 +++++++--------------- .../media/uapi/v4l/vidioc-dv-timings-cap.rst | 14 +++--- .../media/uapi/v4l/vidioc-enum-frameintervals.rst | 19 +++----- .../media/uapi/v4l/vidioc-enum-framesizes.rst | 18 +++---- .../media/uapi/v4l/vidioc-g-dv-timings.rst | 16 +++---- .../media/uapi/v4l/vidioc-g-ext-ctrls.rst | 45 +++++++----------- Documentation/media/uapi/v4l/vidioc-g-fmt.rst | 29 +++++------- Documentation/media/uapi/v4l/vidioc-g-parm.rst | 18 +++---- Documentation/media/uapi/v4l/vidioc-queryctrl.rst | 19 +++----- 19 files changed, 153 insertions(+), 260 deletions(-) (limited to 'Documentation') diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-conn-info.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-conn-info.rst index a21659d55c6b..6818ddf1495c 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-conn-info.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-conn-info.rst @@ -44,18 +44,18 @@ is only available if the ``CEC_CAP_CONNECTOR_INFO`` capability is set. .. flat-table:: struct cec_connector_info :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 8 + :widths: 1 1 8 * - __u32 - ``type`` - The type of connector this adapter is associated with. - * - union + * - union { - ``(anonymous)`` - - - * - - - ``struct cec_drm_connector_info`` + * - ``struct cec_drm_connector_info`` - drm - :ref:`cec-drm-connector-info` + * - } + - .. tabularcolumns:: |p{4.4cm}|p{2.5cm}|p{10.6cm}| diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst index 5e21b1fbfc01..d16b226b1bef 100644 --- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst +++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst @@ -109,35 +109,33 @@ it is guaranteed that the state did change in between the two events. .. flat-table:: struct cec_event :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 8 + :widths: 1 1 8 * - __u64 - ``ts`` - - :cspan:`1`\ Timestamp of the event in ns. + - Timestamp of the event in ns. The timestamp has been taken from the ``CLOCK_MONOTONIC`` clock. To access the same clock from userspace use :c:func:`clock_gettime`. * - __u32 - ``event`` - - :cspan:`1` The CEC event type, see :ref:`cec-events`. + - The CEC event type, see :ref:`cec-events`. * - __u32 - ``flags`` - - :cspan:`1` Event flags, see :ref:`cec-event-flags`. - * - union + - Event flags, see :ref:`cec-event-flags`. + * - union { - (anonymous) - - - - - * - - - struct cec_event_state_change + * - struct cec_event_state_change - ``state_change`` - The new adapter state as sent by the :ref:`CEC_EVENT_STATE_CHANGE ` event. - * - - - struct cec_event_lost_msgs + * - struct cec_event_lost_msgs - ``lost_msgs`` - The number of lost messages as sent by the :ref:`CEC_EVENT_LOST_MSGS ` event. + * - } + - .. tabularcolumns:: |p{5.6cm}|p{0.9cm}|p{11.0cm}| diff --git a/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst b/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst index 6218d9cbdd83..33e2b110145c 100644 --- a/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst +++ b/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst @@ -64,12 +64,11 @@ id's until they get an error. .. flat-table:: struct media_entity_desc :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 1 8 + :widths: 2 2 1 8 * - __u32 - ``id`` - - - - Entity ID, set by the application. When the ID is or'ed with ``MEDIA_ENT_ID_FLAG_NEXT``, the driver clears the flag and returns the first entity with a larger ID. Do not expect that the ID will @@ -79,79 +78,70 @@ id's until they get an error. * - char - ``name``\ [32] - - - - Entity name as an UTF-8 NULL-terminated string. This name must be unique within the media topology. * - __u32 - ``type`` - - - - Entity type, see :ref:`media-entity-functions` for details. * - __u32 - ``revision`` - - - - Entity revision. Always zero (obsolete) * - __u32 - ``flags`` - - - - Entity flags, see :ref:`media-entity-flag` for details. * - __u32 - ``group_id`` - - - - Entity group ID. Always zero (obsolete) * - __u16 - ``pads`` - - - - Number of pads * - __u16 - ``links`` - - - - Total number of outbound links. Inbound links are not counted in this field. * - __u32 - ``reserved[4]`` - - - - Reserved for future extensions. Drivers and applications must set the array to zero. - * - union + * - union { + - (anonymous) - * - - - struct + * - struct - ``dev`` - - Valid for (sub-)devices that create a single device node. * - - - - __u32 - ``major`` - Device node major number. * - - - - __u32 - ``minor`` - Device node minor number. - * - - - __u8 + * - __u8 - ``raw``\ [184] - - + * - } + - Return Value diff --git a/Documentation/media/uapi/v4l/buffer.rst b/Documentation/media/uapi/v4l/buffer.rst index 9149b57728e5..3112300c2fa0 100644 --- a/Documentation/media/uapi/v4l/buffer.rst +++ b/Documentation/media/uapi/v4l/buffer.rst @@ -172,11 +172,10 @@ struct v4l2_buffer .. flat-table:: struct v4l2_buffer :header-rows: 0 :stub-columns: 0 - :widths: 1 2 1 10 + :widths: 1 2 10 * - __u32 - ``index`` - - - Number of the buffer, set by the application except when calling :ref:`VIDIOC_DQBUF `, then it is set by the driver. This field can range from zero to the number of buffers @@ -186,14 +185,12 @@ struct v4l2_buffer :ref:`VIDIOC_CREATE_BUFS` minus one. * - __u32 - ``type`` - - - Type of the buffer, same as struct :c:type:`v4l2_format` ``type`` or struct :c:type:`v4l2_requestbuffers` ``type``, set by the application. See :c:type:`v4l2_buf_type` * - __u32 - ``bytesused`` - - - The number of bytes occupied by the data in the buffer. It depends on the negotiated data format and may change with each buffer for compressed variable size data like JPEG images. Drivers must set @@ -205,18 +202,15 @@ struct v4l2_buffer ``planes`` pointer is used instead. * - __u32 - ``flags`` - - - Flags set by the application or driver, see :ref:`buffer-flags`. * - __u32 - ``field`` - - - Indicates the field order of the image in the buffer, see :c:type:`v4l2_field`. This field is not used when the buffer contains VBI data. Drivers must set it when ``type`` refers to a capture stream, applications when it refers to an output stream. * - struct timeval - ``timestamp`` - - - For capture streams this is time when the first data byte was captured, as returned by the :c:func:`clock_gettime()` function for the relevant clock id; see ``V4L2_BUF_FLAG_TIMESTAMP_*`` in @@ -229,7 +223,6 @@ struct v4l2_buffer stream. * - struct :c:type:`v4l2_timecode` - ``timecode`` - - - When the ``V4L2_BUF_FLAG_TIMECODE`` flag is set in ``flags``, this structure contains a frame timecode. In :c:type:`V4L2_FIELD_ALTERNATE ` mode the top and @@ -239,10 +232,9 @@ struct v4l2_buffer independent of the ``timestamp`` and ``sequence`` fields. * - __u32 - ``sequence`` - - - Set by the driver, counting the frames (not fields!) in sequence. This field is set for both input and output devices. - * - :cspan:`3` + * - :cspan:`2` In :c:type:`V4L2_FIELD_ALTERNATE ` mode the top and bottom field have the same sequence number. The count starts at @@ -262,13 +254,11 @@ struct v4l2_buffer * - __u32 - ``memory`` - - - This field must be set by applications and/or drivers in accordance with the selected I/O method. See :c:type:`v4l2_memory` - * - union + * - union { - ``m`` - * - - - __u32 + * - __u32 - ``offset`` - For the single-planar API and when ``memory`` is ``V4L2_MEMORY_MMAP`` this is the offset of the buffer from the @@ -276,29 +266,27 @@ struct v4l2_buffer and apart of serving as parameter to the :ref:`mmap() ` function not useful for applications. See :ref:`mmap` for details - * - - - unsigned long + * - unsigned long - ``userptr`` - For the single-planar API and when ``memory`` is ``V4L2_MEMORY_USERPTR`` this is a pointer to the buffer (casted to unsigned long type) in virtual memory, set by the application. See :ref:`userp` for details. - * - - - struct v4l2_plane + * - struct v4l2_plane - ``*planes`` - When using the multi-planar API, contains a userspace pointer to an array of struct :c:type:`v4l2_plane`. The size of the array should be put in the ``length`` field of this struct :c:type:`v4l2_buffer` structure. - * - - - int + * - int - ``fd`` - For the single-plane API and when ``memory`` is ``V4L2_MEMORY_DMABUF`` this is the file descriptor associated with a DMABUF buffer. + * - } + - * - __u32 - ``length`` - - - Size of the buffer (not the payload) in bytes for the single-planar API. This is set by the driver based on the calls to :ref:`VIDIOC_REQBUFS` and/or @@ -308,12 +296,10 @@ struct v4l2_buffer actual number of valid elements in that array. * - __u32 - ``reserved2`` - - - A place holder for future extensions. Drivers and applications must set this to 0. * - __u32 - ``request_fd`` - - - The file descriptor of the request to queue the buffer to. If the flag ``V4L2_BUF_FLAG_REQUEST_FD`` is set, then the buffer will be queued to this request. If the flag is not set, then this field will @@ -344,11 +330,10 @@ struct v4l2_plane .. flat-table:: :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 2 + :widths: 1 1 2 * - __u32 - ``bytesused`` - - - The number of bytes occupied by data in the plane (its payload). Drivers must set this field when ``type`` refers to a capture stream, applications when it refers to an output stream. If the @@ -362,40 +347,35 @@ struct v4l2_plane which may not be 0. * - __u32 - ``length`` - - - Size in bytes of the plane (not its payload). This is set by the driver based on the calls to :ref:`VIDIOC_REQBUFS` and/or :ref:`VIDIOC_CREATE_BUFS`. - * - union + * - union { - ``m`` - - - - - * - - - __u32 + * - __u32 - ``mem_offset`` - When the memory type in the containing struct :c:type:`v4l2_buffer` is ``V4L2_MEMORY_MMAP``, this is the value that should be passed to :ref:`mmap() `, similar to the ``offset`` field in struct :c:type:`v4l2_buffer`. - * - - - unsigned long + * - unsigned long - ``userptr`` - When the memory type in the containing struct :c:type:`v4l2_buffer` is ``V4L2_MEMORY_USERPTR``, this is a userspace pointer to the memory allocated for this plane by an application. - * - - - int + * - int - ``fd`` - When the memory type in the containing struct :c:type:`v4l2_buffer` is ``V4L2_MEMORY_DMABUF``, this is a file descriptor associated with a DMABUF buffer, similar to the ``fd`` field in struct :c:type:`v4l2_buffer`. + * - } + - * - __u32 - ``data_offset`` - - - Offset in bytes to video data in the plane. Drivers must set this field when ``type`` refers to a capture stream, applications when it refers to an output stream. @@ -407,7 +387,6 @@ struct v4l2_plane at offset ``data_offset`` from the start of the plane. * - __u32 - ``reserved[11]`` - - - Reserved for future use. Should be zeroed by drivers and applications. diff --git a/Documentation/media/uapi/v4l/dev-sliced-vbi.rst b/Documentation/media/uapi/v4l/dev-sliced-vbi.rst index e86346f66017..7b2d38dd402a 100644 --- a/Documentation/media/uapi/v4l/dev-sliced-vbi.rst +++ b/Documentation/media/uapi/v4l/dev-sliced-vbi.rst @@ -478,33 +478,30 @@ struct v4l2_mpeg_vbi_fmt_ivtv .. flat-table:: :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 2 + :widths: 1 1 2 * - __u8 - ``magic``\ [4] - - - A "magic" constant from :ref:`v4l2-mpeg-vbi-fmt-ivtv-magic` that indicates this is a valid sliced VBI data payload and also indicates which member of the anonymous union, ``itv0`` or ``ITV0``, to use for the payload data. - * - union + * - union { - (anonymous) - * - - - struct :c:type:`v4l2_mpeg_vbi_itv0` + * - struct :c:type:`v4l2_mpeg_vbi_itv0` - ``itv0`` - The primary form of the sliced VBI data payload that contains anywhere from 1 to 35 lines of sliced VBI data. Line masks are provided in this form of the payload indicating which VBI lines are provided. - * - - - struct :ref:`v4l2_mpeg_vbi_ITV0 ` + * - struct :ref:`v4l2_mpeg_vbi_ITV0 ` - ``ITV0`` - An alternate form of the sliced VBI data payload used when 36 lines of sliced VBI data are present. No line masks are provided in this form of the payload; all valid line mask bits are implcitly set. - - + * - } + - .. _v4l2-mpeg-vbi-fmt-ivtv-magic: diff --git a/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst b/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst index db43dda5aafb..054275c0dfc1 100644 --- a/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst +++ b/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst @@ -100,7 +100,8 @@ describing all planes of that format. * - __u8 - ``flags`` - Flags set by the application or driver, see :ref:`format-flags`. - * - :cspan:`2` union { (anonymous) + * - union { + - (anonymous) * - __u8 - ``ycbcr_enc`` - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`. @@ -113,7 +114,8 @@ describing all planes of that format. This information supplements the ``colorspace`` and must be set by the driver for capture streams and by the application for output streams, see :ref:`colorspaces`. - * - :cspan:`2` } + * - } + - * - __u8 - ``quantization`` - Quantization range, from enum :c:type:`v4l2_quantization`. diff --git a/Documentation/media/uapi/v4l/pixfmt-v4l2.rst b/Documentation/media/uapi/v4l/pixfmt-v4l2.rst index a8321c348bf8..a993b861bf75 100644 --- a/Documentation/media/uapi/v4l/pixfmt-v4l2.rst +++ b/Documentation/media/uapi/v4l/pixfmt-v4l2.rst @@ -143,7 +143,6 @@ Single-planar format structure - Flags set by the application or driver, see :ref:`format-flags`. * - union { - (anonymous) - - * - __u32 - ``ycbcr_enc`` - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`. @@ -158,7 +157,6 @@ Single-planar format structure streams, see :ref:`colorspaces`. * - } - - - * - __u32 - ``quantization`` - Quantization range, from enum :c:type:`v4l2_quantization`. diff --git a/Documentation/media/uapi/v4l/vidioc-dbg-g-chip-info.rst b/Documentation/media/uapi/v4l/vidioc-dbg-g-chip-info.rst index a1cf20181cf1..d38031dbe4e4 100644 --- a/Documentation/media/uapi/v4l/vidioc-dbg-g-chip-info.rst +++ b/Documentation/media/uapi/v4l/vidioc-dbg-g-chip-info.rst @@ -91,23 +91,23 @@ instructions. .. flat-table:: struct v4l2_dbg_match :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 2 + :widths: 1 1 2 * - __u32 - ``type`` - See :ref:`name-chip-match-types` for a list of possible types. - * - union + * - union { - (anonymous) - * - - - __u32 + * - __u32 - ``addr`` - Match a chip by this number, interpreted according to the ``type`` field. - * - - - char + * - char - ``name[32]`` - Match a chip by this name, interpreted according to the ``type`` field. Currently unused. + * - } + - diff --git a/Documentation/media/uapi/v4l/vidioc-dbg-g-register.rst b/Documentation/media/uapi/v4l/vidioc-dbg-g-register.rst index 29e1d4fc4f52..112597c6cad2 100644 --- a/Documentation/media/uapi/v4l/vidioc-dbg-g-register.rst +++ b/Documentation/media/uapi/v4l/vidioc-dbg-g-register.rst @@ -100,23 +100,23 @@ instructions. .. flat-table:: struct v4l2_dbg_match :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 2 + :widths: 1 1 2 * - __u32 - ``type`` - See :ref:`chip-match-types` for a list of possible types. - * - union + * - union { - (anonymous) - * - - - __u32 + * - __u32 - ``addr`` - Match a chip by this number, interpreted according to the ``type`` field. - * - - - char + * - char - ``name[32]`` - Match a chip by this name, interpreted according to the ``type`` field. Currently unused. + * - } + - diff --git a/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst b/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst index f1a504836f31..784c5980da8d 100644 --- a/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst +++ b/Documentation/media/uapi/v4l/vidioc-decoder-cmd.rst @@ -77,32 +77,25 @@ introduced in Linux 3.3. They are, however, mandatory for stateful mem2mem decod .. flat-table:: struct v4l2_decoder_cmd :header-rows: 0 :stub-columns: 0 - :widths: 11 24 12 16 106 + :widths: 1 1 1 3 * - __u32 - ``cmd`` - - - - The decoder command, see :ref:`decoder-cmds`. * - __u32 - ``flags`` - - - - Flags to go with the command. If no flags are defined for this command, drivers and applications must set this field to zero. - * - union + * - union { - (anonymous) - - - - - - - * - - - struct + * - struct - ``start`` - - Structure containing additional data for the ``V4L2_DEC_CMD_START`` command. * - - - - __s32 - ``speed`` - Playback speed and direction. The playback speed is defined as @@ -113,7 +106,6 @@ introduced in Linux 3.3. They are, however, mandatory for stateful mem2mem decod of 1 steps just one frame forward, a speed of -1 steps just one frame back. * - - - - __u32 - ``format`` - Format restrictions. This field is set by the driver, not the @@ -124,30 +116,26 @@ introduced in Linux 3.3. They are, however, mandatory for stateful mem2mem decod GOPs, which it can then play in reverse order. So to implement reverse playback the application must feed the decoder the last GOP in the video file, then the GOP before that, etc. etc. - * - - - struct + * - struct - ``stop`` - - Structure containing additional data for the ``V4L2_DEC_CMD_STOP`` command. * - - - - __u64 - ``pts`` - Stop playback at this ``pts`` or immediately if the playback is already past that timestamp. Leave to 0 if you want to stop after the last frame was decoded. - * - - - struct + * - struct - ``raw`` - - - - * - - - - __u32 - ``data``\ [16] - Reserved for future extensions. Drivers and applications must set the array to zero. + * - } + - diff --git a/Documentation/media/uapi/v4l/vidioc-dqevent.rst b/Documentation/media/uapi/v4l/vidioc-dqevent.rst index 42659a3d1705..2f37d255352a 100644 --- a/Documentation/media/uapi/v4l/vidioc-dqevent.rst +++ b/Documentation/media/uapi/v4l/vidioc-dqevent.rst @@ -55,66 +55,54 @@ call. .. flat-table:: struct v4l2_event :header-rows: 0 :stub-columns: 0 - :widths: 1 1 2 1 + :widths: 1 1 2 * - __u32 - ``type`` - - - Type of the event, see :ref:`event-type`. - * - union + * - union { - ``u`` - - - - - * - - - struct :c:type:`v4l2_event_vsync` + * - struct :c:type:`v4l2_event_vsync` - ``vsync`` - Event data for event ``V4L2_EVENT_VSYNC``. - * - - - struct :c:type:`v4l2_event_ctrl` + * - struct :c:type:`v4l2_event_ctrl` - ``ctrl`` - Event data for event ``V4L2_EVENT_CTRL``. - * - - - struct :c:type:`v4l2_event_frame_sync` + * - struct :c:type:`v4l2_event_frame_sync` - ``frame_sync`` - Event data for event ``V4L2_EVENT_FRAME_SYNC``. - * - - - struct :c:type:`v4l2_event_motion_det` + * - struct :c:type:`v4l2_event_motion_det` - ``motion_det`` - Event data for event V4L2_EVENT_MOTION_DET. - * - - - struct :c:type:`v4l2_event_src_change` + * - struct :c:type:`v4l2_event_src_change` - ``src_change`` - Event data for event V4L2_EVENT_SOURCE_CHANGE. - * - - - __u8 + * - __u8 - ``data``\ [64] - Event data. Defined by the event type. The union should be used to define easily accessible type for events. + * - } + - * - __u32 - ``pending`` - - - Number of pending events excluding this one. * - __u32 - ``sequence`` - - - Event sequence number. The sequence number is incremented for every subscribed event that takes place. If sequence numbers are not contiguous it means that events have been lost. * - struct timespec - ``timestamp`` - - - Event timestamp. The timestamp has been taken from the ``CLOCK_MONOTONIC`` clock. To access the same clock outside V4L2, use :c:func:`clock_gettime`. * - u32 - ``id`` - - - The ID associated with the event source. If the event does not have an associated ID (this depends on the event type), then this is 0. * - __u32 - ``reserved``\ [8] - - - Reserved for future extensions. Drivers must set the array to zero. @@ -233,54 +221,45 @@ call. .. flat-table:: struct v4l2_event_ctrl :header-rows: 0 :stub-columns: 0 - :widths: 1 1 2 1 + :widths: 1 1 2 * - __u32 - ``changes`` - - - A bitmask that tells what has changed. See :ref:`ctrl-changes-flags`. * - __u32 - ``type`` - - - The type of the control. See enum :c:type:`v4l2_ctrl_type`. - * - union (anonymous) - - - - - - - * - - - __s32 + * - union { + - (anonymous) + * - __s32 - ``value`` - The 32-bit value of the control for 32-bit control types. This is 0 for string controls since the value of a string cannot be passed using :ref:`VIDIOC_DQEVENT`. - * - - - __s64 + * - __s64 - ``value64`` - The 64-bit value of the control for 64-bit control types. + * - } + - * - __u32 - ``flags`` - - - The control flags. See :ref:`control-flags`. * - __s32 - ``minimum`` - - - The minimum value of the control. See struct :ref:`v4l2_queryctrl `. * - __s32 - ``maximum`` - - - The maximum value of the control. See struct :ref:`v4l2_queryctrl `. * - __s32 - ``step`` - - - The step value of the control. See struct :ref:`v4l2_queryctrl `. * - __s32 - ``default_value`` - - - The default value value of the control. See struct :ref:`v4l2_queryctrl `. diff --git a/Documentation/media/uapi/v4l/vidioc-dv-timings-cap.rst b/Documentation/media/uapi/v4l/vidioc-dv-timings-cap.rst index e62d45d37072..1d0acbf14c4f 100644 --- a/Documentation/media/uapi/v4l/vidioc-dv-timings-cap.rst +++ b/Documentation/media/uapi/v4l/vidioc-dv-timings-cap.rst @@ -112,7 +112,7 @@ that doesn't support them will return an ``EINVAL`` error code. .. flat-table:: struct v4l2_dv_timings_cap :header-rows: 0 :stub-columns: 0 - :widths: 1 1 2 1 + :widths: 1 1 2 * - __u32 - ``type`` @@ -127,16 +127,14 @@ that doesn't support them will return an ``EINVAL`` error code. - Reserved for future extensions. Drivers and applications must set the array to zero. - * - union - - - - - * - - - struct :c:type:`v4l2_bt_timings_cap` + * - union { + - (anonymous) + * - struct :c:type:`v4l2_bt_timings_cap` - ``bt`` - BT.656/1120 timings capabilities of the hardware. - * - - - __u32 + * - __u32 - ``raw_data``\ [32] + * - } - .. tabularcolumns:: |p{7.0cm}|p{10.5cm}| diff --git a/Documentation/media/uapi/v4l/vidioc-enum-frameintervals.rst b/Documentation/media/uapi/v4l/vidioc-enum-frameintervals.rst index 2c69f26b165d..563a67cddeca 100644 --- a/Documentation/media/uapi/v4l/vidioc-enum-frameintervals.rst +++ b/Documentation/media/uapi/v4l/vidioc-enum-frameintervals.rst @@ -138,36 +138,31 @@ application should zero out all members except for the *IN* fields. * - __u32 - ``index`` - - - IN: Index of the given frame interval in the enumeration. * - __u32 - ``pixel_format`` - - - IN: Pixel format for which the frame intervals are enumerated. * - __u32 - ``width`` - - - IN: Frame width for which the frame intervals are enumerated. * - __u32 - ``height`` - - - IN: Frame height for which the frame intervals are enumerated. * - __u32 - ``type`` - - - OUT: Frame interval type the device supports. - * - union - - - - + * - union { + - (anonymous) - OUT: Frame interval with the given index. - * - - - struct :c:type:`v4l2_fract` + * - struct :c:type:`v4l2_fract` - ``discrete`` - Frame interval [s]. - * - - - struct :c:type:`v4l2_frmival_stepwise` + * - struct :c:type:`v4l2_frmival_stepwise` - ``stepwise`` - + * - } + - + - * - __u32 - ``reserved[2]`` - diff --git a/Documentation/media/uapi/v4l/vidioc-enum-framesizes.rst b/Documentation/media/uapi/v4l/vidioc-enum-framesizes.rst index cf31f548826f..cd97546a7122 100644 --- a/Documentation/media/uapi/v4l/vidioc-enum-framesizes.rst +++ b/Documentation/media/uapi/v4l/vidioc-enum-framesizes.rst @@ -155,31 +155,27 @@ application should zero out all members except for the *IN* fields. * - __u32 - ``index`` - - - IN: Index of the given frame size in the enumeration. * - __u32 - ``pixel_format`` - - - IN: Pixel format for which the frame sizes are enumerated. * - __u32 - ``type`` - - - OUT: Frame size type the device supports. - * - union - - - - + * - union { + - (anonymous) - OUT: Frame size with the given index. - * - - - struct :c:type:`v4l2_frmsize_discrete` + * - struct :c:type:`v4l2_frmsize_discrete` - ``discrete`` - - * - - - struct :c:type:`v4l2_frmsize_stepwise` + * - struct :c:type:`v4l2_frmsize_stepwise` - ``stepwise`` - + * - } + - + - * - __u32 - ``reserved[2]`` - - - Reserved space for future use. Must be zeroed by drivers and applications. diff --git a/Documentation/media/uapi/v4l/vidioc-g-dv-timings.rst b/Documentation/media/uapi/v4l/vidioc-g-dv-timings.rst index 5c675cbac4cf..e36dd2622857 100644 --- a/Documentation/media/uapi/v4l/vidioc-g-dv-timings.rst +++ b/Documentation/media/uapi/v4l/vidioc-g-dv-timings.rst @@ -179,23 +179,21 @@ EBUSY .. flat-table:: struct v4l2_dv_timings :header-rows: 0 :stub-columns: 0 - :widths: 1 1 2 1 + :widths: 1 1 2 * - __u32 - ``type`` - - - Type of DV timings as listed in :ref:`dv-timing-types`. - * - union - - - - - * - - - struct :c:type:`v4l2_bt_timings` + * - union { + - (anonymous) + * - struct :c:type:`v4l2_bt_timings` - ``bt`` - Timings defined by BT.656/1120 specifications - * - - - __u32 + * - __u32 - ``reserved``\ [32] - + * - } + - .. tabularcolumns:: |p{4.4cm}|p{4.4cm}|p{8.7cm}| diff --git a/Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst b/Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst index 271cac18afbb..cdb2a2a512d6 100644 --- a/Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst +++ b/Documentation/media/uapi/v4l/vidioc-g-ext-ctrls.rst @@ -136,15 +136,13 @@ still cause this situation. .. flat-table:: struct v4l2_ext_control :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 2 + :widths: 1 1 2 * - __u32 - ``id`` - - - Identifies the control, set by the application. * - __u32 - ``size`` - - - The total size in bytes of the payload of this control. This is normally 0, but for pointer controls this should be set to the size of the memory containing the payload, or that will receive @@ -161,55 +159,48 @@ still cause this situation. *length* of the string may well be much smaller. * - __u32 - ``reserved2``\ [1] - - - Reserved for future extensions. Drivers and applications must set the array to zero. - * - union + * - union { - (anonymous) - * - - - __s32 + * - __s32 - ``value`` - New value or current value. Valid if this control is not of type ``V4L2_CTRL_TYPE_INTEGER64`` and ``V4L2_CTRL_FLAG_HAS_PAYLOAD`` is not set. - * - - - __s64 + * - __s64 - ``value64`` - New value or current value. Valid if this control is of type ``V4L2_CTRL_TYPE_INTEGER64`` and ``V4L2_CTRL_FLAG_HAS_PAYLOAD`` is not set. - * - - - char * + * - char * - ``string`` - A pointer to a string. Valid if this control is of type ``V4L2_CTRL_TYPE_STRING``. - * - - - __u8 * + * - __u8 * - ``p_u8`` - A pointer to a matrix control of unsigned 8-bit values. Valid if this control is of type ``V4L2_CTRL_TYPE_U8``. - * - - - __u16 * + * - __u16 * - ``p_u16`` - A pointer to a matrix control of unsigned 16-bit values. Valid if this control is of type ``V4L2_CTRL_TYPE_U16``. - * - - - __u32 * + * - __u32 * - ``p_u32`` - A pointer to a matrix control of unsigned 32-bit values. Valid if this control is of type ``V4L2_CTRL_TYPE_U32``. - * - - - :c:type:`v4l2_area` * + * - :c:type:`v4l2_area` * - ``p_area`` - A pointer to a struct :c:type:`v4l2_area`. Valid if this control is of type ``V4L2_CTRL_TYPE_AREA``. - * - - - void * + * - void * - ``ptr`` - A pointer to a compound type which can be an N-dimensional array and/or a compound type (the control's type is >= ``V4L2_CTRL_COMPOUND_TYPES``). Valid if ``V4L2_CTRL_FLAG_HAS_PAYLOAD`` is set for this control. + * - } + - .. tabularcolumns:: |p{4.0cm}|p{2.2cm}|p{2.1cm}|p{8.2cm}| @@ -221,12 +212,11 @@ still cause this situation. .. flat-table:: struct v4l2_ext_controls :header-rows: 0 :stub-columns: 0 - :widths: 1 1 2 1 + :widths: 1 1 2 - * - union + * - union { - (anonymous) - * - - - __u32 + * - __u32 - ``ctrl_class`` - The control class to which all controls belong, see :ref:`ctrl-class`. Drivers that use a kernel framework for @@ -235,8 +225,7 @@ still cause this situation. support this can be tested by setting ``ctrl_class`` to 0 and calling :ref:`VIDIOC_TRY_EXT_CTRLS ` with a ``count`` of 0. If that succeeds, then the driver supports this feature. - * - - - __u32 + * - __u32 - ``which`` - Which value of the control to get/set/try. ``V4L2_CTRL_WHICH_CUR_VAL`` will return the current value of the @@ -261,6 +250,8 @@ still cause this situation. by setting ctrl_class to ``V4L2_CTRL_WHICH_CUR_VAL`` and calling VIDIOC_TRY_EXT_CTRLS with a count of 0. If that fails, then the driver does not support ``V4L2_CTRL_WHICH_CUR_VAL``. + * - } + - * - __u32 - ``count`` - The number of controls in the controls array. May also be zero. diff --git a/Documentation/media/uapi/v4l/vidioc-g-fmt.rst b/Documentation/media/uapi/v4l/vidioc-g-fmt.rst index e35a9caff652..1e69bfc46e8d 100644 --- a/Documentation/media/uapi/v4l/vidioc-g-fmt.rst +++ b/Documentation/media/uapi/v4l/vidioc-g-fmt.rst @@ -103,51 +103,44 @@ The format as returned by :ref:`VIDIOC_TRY_FMT ` must be identical * - __u32 - ``type`` - - - Type of the data stream, see :c:type:`v4l2_buf_type`. - * - union + * - union { - ``fmt`` - * - - - struct :c:type:`v4l2_pix_format` + * - struct :c:type:`v4l2_pix_format` - ``pix`` - Definition of an image format, see :ref:`pixfmt`, used by video capture and output devices. - * - - - struct :c:type:`v4l2_pix_format_mplane` + * - struct :c:type:`v4l2_pix_format_mplane` - ``pix_mp`` - Definition of an image format, see :ref:`pixfmt`, used by video capture and output devices that support the :ref:`multi-planar version of the API `. - * - - - struct :c:type:`v4l2_window` + * - struct :c:type:`v4l2_window` - ``win`` - Definition of an overlaid image, see :ref:`overlay`, used by video overlay devices. - * - - - struct :c:type:`v4l2_vbi_format` + * - struct :c:type:`v4l2_vbi_format` - ``vbi`` - Raw VBI capture or output parameters. This is discussed in more detail in :ref:`raw-vbi`. Used by raw VBI capture and output devices. - * - - - struct :c:type:`v4l2_sliced_vbi_format` + * - struct :c:type:`v4l2_sliced_vbi_format` - ``sliced`` - Sliced VBI capture or output parameters. See :ref:`sliced` for details. Used by sliced VBI capture and output devices. - * - - - struct :c:type:`v4l2_sdr_format` + * - struct :c:type:`v4l2_sdr_format` - ``sdr`` - Definition of a data format, see :ref:`pixfmt`, used by SDR capture and output devices. - * - - - struct :c:type:`v4l2_meta_format` + * - struct :c:type:`v4l2_meta_format` - ``meta`` - Definition of a metadata format, see :ref:`meta-formats`, used by metadata capture devices. - * - - - __u8 + * - __u8 - ``raw_data``\ [200] - Place holder for future extensions. + * - } + - Return Value diff --git a/Documentation/media/uapi/v4l/vidioc-g-parm.rst b/Documentation/media/uapi/v4l/vidioc-g-parm.rst index d9d5d97848d3..044a459e073f 100644 --- a/Documentation/media/uapi/v4l/vidioc-g-parm.rst +++ b/Documentation/media/uapi/v4l/vidioc-g-parm.rst @@ -69,33 +69,29 @@ union holding separate parameters for input and output devices. .. flat-table:: struct v4l2_streamparm :header-rows: 0 :stub-columns: 0 - :widths: 1 1 1 2 + :widths: 1 1 2 * - __u32 - ``type`` - - - The buffer (stream) type, same as struct :c:type:`v4l2_format` ``type``, set by the application. See :c:type:`v4l2_buf_type`. - * - union + * - union { - ``parm`` - - - - - * - - - struct :c:type:`v4l2_captureparm` + * - struct :c:type:`v4l2_captureparm` - ``capture`` - Parameters for capture devices, used when ``type`` is ``V4L2_BUF_TYPE_VIDEO_CAPTURE`` or ``V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE``. - * - - - struct :c:type:`v4l2_outputparm` + * - struct :c:type:`v4l2_outputparm` - ``output`` - Parameters for output devices, used when ``type`` is ``V4L2_BUF_TYPE_VIDEO_OUTPUT`` or ``V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE``. - * - - - __u8 + * - __u8 - ``raw_data``\ [200] - A place holder for future extensions. + * - } + - diff --git a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst index 22ff3c6abd9c..8971f4cfb16e 100644 --- a/Documentation/media/uapi/v4l/vidioc-queryctrl.rst +++ b/Documentation/media/uapi/v4l/vidioc-queryctrl.rst @@ -290,34 +290,29 @@ See also the examples in :ref:`control`. .. flat-table:: struct v4l2_querymenu :header-rows: 0 :stub-columns: 0 - :widths: 1 1 2 1 + :widths: 1 1 2 * - __u32 - - - ``id`` - Identifies the control, set by the application from the respective struct :ref:`v4l2_queryctrl ` ``id``. * - __u32 - - - ``index`` - Index of the menu item, starting at zero, set by the application. - * - union - - - - - - - * - - - __u8 + * - union { + - (anonymous) + * - __u8 - ``name``\ [32] - Name of the menu item, a NUL-terminated ASCII string. This information is intended for the user. This field is valid for ``V4L2_CTRL_TYPE_MENU`` type controls. - * - - - __s64 + * - __s64 - ``value`` - Value of the integer menu item. This field is valid for ``V4L2_CTRL_TYPE_INTEGER_MENU`` type controls. - * - __u32 + * - } - + * - __u32 - ``reserved`` - Reserved for future extensions. Drivers must set the array to zero. -- cgit v1.2.3 From ecd942a0ef3a30f6037870bfc0a294d7e9fe9d4f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 24 Feb 2020 08:35:47 +0100 Subject: devlink: add ACL generic packet traps Add packet traps that can report packets that were dropped during ACL processing. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-trap.rst | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst index 47a429bb8658..63350e7332ce 100644 --- a/Documentation/networking/devlink/devlink-trap.rst +++ b/Documentation/networking/devlink/devlink-trap.rst @@ -238,6 +238,12 @@ be added to the following table: - ``drop`` - Traps NVE packets that the device decided to drop because their overlay source MAC is multicast + * - ``ingress_flow_action_drop`` + - ``drop`` + - Traps packets dropped during processing of ingress flow action drop + * - ``egress_flow_action_drop`` + - ``drop`` + - Traps packets dropped during processing of egress flow action drop Driver-specific Packet Traps ============================ @@ -277,6 +283,9 @@ narrow. The description of these groups must be added to the following table: * - ``tunnel_drops`` - Contains packet traps for packets that were dropped by the device during tunnel encapsulation / decapsulation + * - ``acl_drops`` + - Contains packet traps for packets that were dropped by the device during + ACL processing Testing ======= -- cgit v1.2.3 From 04eed745615244525fb918fa7c0a86e811371536 Mon Sep 17 00:00:00 2001 From: Min Li Date: Fri, 21 Feb 2020 16:48:38 -0500 Subject: dt-bindings: ptp: Add device tree binding for IDT 82P33 based PTP clock Add device tree binding doc for the PTP clock based on IDT 82P33 Synchronization Management Unit (SMU). Changes since v1: - As suggested by Rob Herring: 1. Drop reg description for i2c 2. Replace i2c@1 with i2c 3. Add addtionalProperties: false Signed-off-by: Min Li Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/ptp/ptp-idt82p33.yaml | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 Documentation/devicetree/bindings/ptp/ptp-idt82p33.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/ptp/ptp-idt82p33.yaml b/Documentation/devicetree/bindings/ptp/ptp-idt82p33.yaml new file mode 100644 index 000000000000..9bc664f414a1 --- /dev/null +++ b/Documentation/devicetree/bindings/ptp/ptp-idt82p33.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ptp/ptp-idt82p33.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: IDT 82P33 PTP Clock Device Tree Bindings + +description: | + IDT 82P33XXX Synchronization Management Unit (SMU) based PTP clock + +maintainers: + - Min Li + +properties: + compatible: + enum: + - idt,82p33810 + - idt,82p33813 + - idt,82p33814 + - idt,82p33831 + - idt,82p33910 + - idt,82p33913 + - idt,82p33914 + - idt,82p33931 + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + phc@51 { + compatible = "idt,82p33810"; + reg = <0x51>; + }; + }; -- cgit v1.2.3 From 571912c69f0ed731bd1e071ade9dc7ca4aa52065 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Mon, 24 Feb 2020 10:57:50 +0530 Subject: net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc. The Bareudp tunnel module provides a generic L3 encapsulation tunnelling module for tunnelling different protocols like MPLS, IP,NSH etc inside a UDP tunnel. Signed-off-by: Martin Varghese Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/bareudp.rst | 34 ++++++++++++++++++++++++++++++++++ Documentation/networking/index.rst | 1 + 2 files changed, 35 insertions(+) create mode 100644 Documentation/networking/bareudp.rst (limited to 'Documentation') diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst new file mode 100644 index 000000000000..6ee68c16cf5b --- /dev/null +++ b/Documentation/networking/bareudp.rst @@ -0,0 +1,34 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================== +Bare UDP Tunnelling Module Documentation +======================================== + +There are various L3 encapsulation standards using UDP being discussed to +leverage the UDP based load balancing capability of different networks. +MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. + +The Bareudp tunnel module provides a generic L3 encapsulation tunnelling +support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside +a UDP tunnel. + +Usage +------ + +1) Device creation & deletion + + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847. + + This creates a bareudp tunnel device which tunnels L3 traffic with ethertype + 0x8847 (MPLS traffic). The destination port of the UDP header will be set to + 6635.The device will listen on UDP port 6635 to receive traffic. + + b) ip link delete bareudp0 + +2) Device Usage + +The bareudp device could be used along with OVS or flower filter in TC. +The OVS or TC flower layer must set the tunnel information in SKB dst field before +sending packet buffer to the bareudp device for transmission. On reception the +bareudp device extracts and stores the tunnel information in SKB dst field before +passing the packet buffer to the network stack. diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index d07d9855dcd3..3a83cfb66704 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -8,6 +8,7 @@ Contents: netdev-FAQ af_xdp + bareudp batman-adv can can_ucan_protocol -- cgit v1.2.3 From 4b5f67232d956a5f837082578ba682410ccab498 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Mon, 24 Feb 2020 10:58:35 +0530 Subject: net: Special handling for IP & MPLS. Special handling is needed in bareudp module for IP & MPLS as they support more than one ethertypes. MPLS has 2 ethertypes. 0x8847 for MPLS unicast and 0x8848 for MPLS multicast. While decapsulating MPLS packet from UDP packet the tunnel destination IP address is checked to determine the ethertype. The ethertype of the packet will be set to 0x8848 if the tunnel destination IP address is a multicast IP address. The ethertype of the packet will be set to 0x8847 if the tunnel destination IP address is a unicast IP address. IP has 2 ethertypes.0x0800 for IPV4 and 0x86dd for IPv6. The version field of the IP header tunnelled will be checked to determine the ethertype. This special handling to tunnel additional ethertypes will be disabled by default and can be enabled using a flag called multiproto. This flag can be used only with ethertypes 0x8847 and 0x0800. Signed-off-by: Martin Varghese Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/bareudp.rst | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst index 6ee68c16cf5b..465a8b251bfe 100644 --- a/Documentation/networking/bareudp.rst +++ b/Documentation/networking/bareudp.rst @@ -12,6 +12,15 @@ The Bareudp tunnel module provides a generic L3 encapsulation tunnelling support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. +Special Handling +---------------- +The bareudp device supports special handling for MPLS & IP as they can have +multiple ethertypes. +MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). +IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). +This special handling can be enabled only for ethertypes ETH_P_IP & ETH_P_MPLS_UC +with a flag called multiproto mode. + Usage ------ @@ -25,7 +34,16 @@ Usage b) ip link delete bareudp0 -2) Device Usage +2) Device creation with multiple proto mode enabled + +There are two ways to create a bareudp device for MPLS & IP with multiproto mode +enabled. + + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847 multiproto + + b) ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls + +3) Device Usage The bareudp device could be used along with OVS or flower filter in TC. The OVS or TC flower layer must set the tunnel information in SKB dst field before -- cgit v1.2.3 From 37b558f6cda6c8cf8045c419e8ed38d2de551016 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 24 Feb 2020 14:15:34 +0200 Subject: dt-bindings: net: dsa: ocelot: document the vsc9959 core This patch adds the required documentation for the embedded L2 switch inside the NXP LS1028A chip. I've submitted it in the legacy format instead of yaml schema, because DSA itself has not yet been converted to yaml, and this driver defines no custom bindings. Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/ocelot.txt | 116 +++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/dsa/ocelot.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/ocelot.txt b/Documentation/devicetree/bindings/net/dsa/ocelot.txt new file mode 100644 index 000000000000..66a129fea705 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/ocelot.txt @@ -0,0 +1,116 @@ +Microchip Ocelot switch driver family +===================================== + +Felix +----- + +The VSC9959 core is currently the only switch supported by the driver, and is +found in the NXP LS1028A. It is a PCI device, part of the larger ENETC root +complex. As a result, the ethernet-switch node is a sub-node of the PCIe root +complex node and its "reg" property conforms to the parent node bindings: + +* reg: Specifies PCIe Device Number and Function Number of the endpoint device, + in this case for the Ethernet L2Switch it is PF5 (of device 0, bus 0). + +It does not require a "compatible" string. + +The interrupt line is used to signal availability of PTP TX timestamps and for +TSN frame preemption. + +For the external switch ports, depending on board configuration, "phy-mode" and +"phy-handle" are populated by board specific device tree instances. Ports 4 and +5 are fixed as internal ports in the NXP LS1028A instantiation. + +The CPU port property ("ethernet") configures the feature called "NPI port" in +the Ocelot hardware core. The CPU port in Ocelot is a set of queues, which are +connected, in the Node Processor Interface (NPI) mode, to an Ethernet port. +By default, in fsl-ls1028a.dtsi, the NPI port is assigned to the internal +2.5Gbps port@4, but can be moved to the 1Gbps port@5, depending on the specific +use case. Moving the NPI port to an external switch port is hardware possible, +but there is no platform support for the Linux system on the LS1028A chip to +operate as an entire slave DSA chip. NPI functionality (and therefore DSA +tagging) is supported on a single port at a time. + +Any port can be disabled (and in fsl-ls1028a.dtsi, they are indeed all disabled +by default, and should be enabled on a per-board basis). But if any external +switch port is enabled at all, the ENETC PF2 (enetc_port2) should be enabled as +well, regardless of whether it is configured as the DSA master or not. This is +because the Felix PHYLINK implementation accesses the MAC PCS registers, which +in hardware truly belong to the ENETC port #2 and not to Felix. + +Supported PHY interface types (appropriate SerDes protocol setting changes are +needed in the RCW binary): + +* phy_mode = "internal": on ports 4 and 5 +* phy_mode = "sgmii": on ports 0, 1, 2, 3 +* phy_mode = "qsgmii": on ports 0, 1, 2, 3 +* phy_mode = "usxgmii": on ports 0, 1, 2, 3 +* phy_mode = "2500base-x": on ports 0, 1, 2, 3 + +For the rest of the device tree binding definitions, which are standard DSA and +PCI, refer to the following documents: + +Documentation/devicetree/bindings/net/dsa/dsa.txt +Documentation/devicetree/bindings/pci/pci.txt + +Example: + +&soc { + pcie@1f0000000 { /* Integrated Endpoint Root Complex */ + ethernet-switch@0,5 { + reg = <0x000500 0 0 0 0>; + /* IEP INT_B */ + interrupts = ; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + /* External ports */ + port@0 { + reg = <0>; + label = "swp0"; + }; + + port@1 { + reg = <1>; + label = "swp1"; + }; + + port@2 { + reg = <2>; + label = "swp2"; + }; + + port@3 { + reg = <3>; + label = "swp3"; + }; + + /* Tagging CPU port */ + port@4 { + reg = <4>; + ethernet = <&enetc_port2>; + phy-mode = "internal"; + + fixed-link { + speed = <2500>; + full-duplex; + }; + }; + + /* Non-tagging CPU port */ + port@5 { + reg = <5>; + phy-mode = "internal"; + status = "disabled"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; +}; -- cgit v1.2.3 From 2f0c17faebde61e7aef0c28914b8c00f98bf0e5f Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Sun, 19 Jan 2020 18:43:19 +0530 Subject: dt-bindings: qcom: Add ipq6018 bindings Document the new ipq6018 SOC/board device tree bindings. Co-developed-by: Selvam Sathappan Periakaruppan Signed-off-by: Selvam Sathappan Periakaruppan Co-developed-by: Sivaprakash Murugesan Signed-off-by: Sivaprakash Murugesan Reviewed-by: Rob Herring Signed-off-by: Sricharan R Link: https://lore.kernel.org/r/1579439601-14810-4-git-send-email-sricharan@codeaurora.org Signed-off-by: Bjorn Andersson --- Documentation/devicetree/bindings/arm/qcom.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml index 5976c0b16b65..64ddae3bd39f 100644 --- a/Documentation/devicetree/bindings/arm/qcom.yaml +++ b/Documentation/devicetree/bindings/arm/qcom.yaml @@ -28,6 +28,7 @@ description: | apq8074 apq8084 apq8096 + ipq6018 ipq8074 mdm9615 msm8916 @@ -41,6 +42,7 @@ description: | The 'board' element must be one of the following strings: cdp + cp01-c1 dragonboard hk01 idp @@ -150,4 +152,10 @@ properties: - enum: - qcom,sc7180-idp - const: qcom,sc7180 + + - items: + - enum: + - qcom,ipq6018-cp01-c1 + - const: qcom,ipq6018 + ... -- cgit v1.2.3 From 43e96ef8b70c50f6054f20b8c357ee5881592082 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 21 Feb 2020 11:48:43 +1100 Subject: docs/core-api: Add Fedora instructions for GCC plugins Add an example of how to install the necessary packages for GCC plugins on Fedora. Signed-off-by: Michael Ellerman Reviewed-by: Kees Cook Signed-off-by: Jonathan Corbet --- Documentation/core-api/gcc-plugins.rst | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/core-api/gcc-plugins.rst b/Documentation/core-api/gcc-plugins.rst index 8502f24396fb..4b1c10f88e30 100644 --- a/Documentation/core-api/gcc-plugins.rst +++ b/Documentation/core-api/gcc-plugins.rst @@ -72,6 +72,10 @@ e.g., on Ubuntu for gcc-4.9:: apt-get install gcc-4.9-plugin-dev +Or on Fedora:: + + dnf install gcc-plugin-devel + Enable a GCC plugin based feature in the kernel config:: CONFIG_GCC_PLUGIN_CYC_COMPLEXITY = y -- cgit v1.2.3 From a3aead706dac19ca504c31ed5d6b3e141addbaec Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 22 Feb 2020 10:00:07 +0100 Subject: docs: gpu: i915.rst: fix warnings due to file renames Fix two warnings due to file rename: WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -function csr support for dmc ./drivers/gpu/drm/i915/intel_csr.c' failed with return code 1 WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -internal ./drivers/gpu/drm/i915/intel_csr.c' failed with return code 2 Fixes: 06d3ff6e7451 ("drm/i915: move intel_csr.[ch] under display/") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet --- Documentation/gpu/i915.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index e539c42a3e78..cc74e24ca3b5 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -207,10 +207,10 @@ DPIO CSR firmware support for DMC ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c +.. kernel-doc:: drivers/gpu/drm/i915/display/intel_csr.c :doc: csr support for dmc -.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c +.. kernel-doc:: drivers/gpu/drm/i915/display/intel_csr.c :internal: Video BIOS Table (VBT) -- cgit v1.2.3 From 2bd49cb581ed5a5fbd43811b952fe9552b737408 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Fri, 21 Feb 2020 17:55:02 +0100 Subject: docs: sysctl/kernel: document acpi_video_flags Based on the implementation in arch/x86/kernel/acpi/sleep.c, in particular the acpi_sleep_setup() function. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 6c0d8c55101c..6586e0e0c11f 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -51,8 +51,15 @@ free space valid for 30 seconds. acpi_video_flags ================ -See Documentation/kernel/power/video.txt, it allows mode of video boot -to be set during run time. +See :doc:`/power/video`. This allows the video resume mode to be set, +in a similar fashion to the ``acpi_sleep`` kernel parameter, by +combining the following values: + += ======= +1 s3_bios +2 s3_mode +4 s3_beep += ======= auto_msgmni -- cgit v1.2.3 From bf347b9da9bbba14b4af845b00d443f24d17d46d Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 19 Feb 2020 12:21:33 -0700 Subject: Documentation: fix a typo for intel_iommu=nobounce "untrusted" was mis-spelled as "unstrusted" Signed-off-by: Alex Hung Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 47cd55e339a5..56bf9b2a9ddf 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1775,7 +1775,7 @@ provided by tboot because it makes the system vulnerable to DMA attacks. nobounce [Default off] - Disable bounce buffer for unstrusted devices such as + Disable bounce buffer for untrusted devices such as the Thunderbolt devices. This will treat the untrusted devices as the trusted ones, hence might expose security risks of DMA attacks. -- cgit v1.2.3 From 021622df556b7213cffec1c0713f093fc7d045e3 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Wed, 19 Feb 2020 16:34:42 +0100 Subject: docs: add a script to check sysctl docs This script allows sysctl documentation to be checked against the kernel source code, to identify missing or obsolete entries. Running it against 5.5 shows for example that sysctl/kernel.rst has two obsolete entries and is missing 52 entries. Signed-off-by: Stephen Kitt Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/sysctl/kernel.rst | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 6586e0e0c11f..1c48ab4bfe30 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -2,6 +2,9 @@ Documentation for /proc/sys/kernel/ =================================== +.. See scripts/check-sysctl-docs to keep this up to date + + Copyright (c) 1998, 1999, Rik van Riel Copyright (c) 2009, Shen Feng -- cgit v1.2.3 From ef45e78fdc11ac1794940c2ff4a6bf3bc4c45372 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 13 Feb 2020 18:23:11 +0530 Subject: docs: kref: Clarify the use of two kref_put() in example code Eventhough the current documentation explains that the reference count gets incremented by both kref_init() and kref_get(), it is often misunderstood that only one instance of kref_put() is needed in the example code. So let's clarify that a bit. Signed-off-by: Manivannan Sadhasivam Signed-off-by: Jonathan Corbet --- Documentation/kref.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kref.txt b/Documentation/kref.txt index 3af384156d7e..c61eea6f1bf2 100644 --- a/Documentation/kref.txt +++ b/Documentation/kref.txt @@ -128,6 +128,10 @@ since we already have a valid pointer that we own a refcount for. The put needs no lock because nothing tries to get the data without already holding a pointer. +In the above example, kref_put() will be called 2 times in both success +and error paths. This is necessary because the reference count got +incremented 2 times by kref_init() and kref_get(). + Note that the "before" in rule 1 is very important. You should never do something like:: -- cgit v1.2.3 From 5c2614e995de07b41eb355155eb5e0e3d593718b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Feb 2020 19:35:28 +0000 Subject: dt-bindings: ARM: Add recent Cortex/Neoverse CPUs The CPU group has been busy since we last updated these bindings... Add definitions for all the new Cortex-A and Neoverse cores now available. Signed-off-by: Robin Murphy Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/arm/cpus.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 765af3cbd11f..f61a5a13fb42 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -123,11 +123,18 @@ properties: - arm,cortex-a12 - arm,cortex-a15 - arm,cortex-a17 + - arm,cortex-a32 + - arm,cortex-a34 + - arm,cortex-a35 - arm,cortex-a53 - arm,cortex-a55 - arm,cortex-a57 + - arm,cortex-a65 - arm,cortex-a72 - arm,cortex-a73 + - arm,cortex-a75 + - arm,cortex-a76 + - arm,cortex-a77 - arm,cortex-m0 - arm,cortex-m0+ - arm,cortex-m1 @@ -136,6 +143,8 @@ properties: - arm,cortex-r4 - arm,cortex-r5 - arm,cortex-r7 + - arm,neoverse-e1 + - arm,neoverse-n1 - brcm,brahma-b15 - brcm,brahma-b53 - brcm,vulcan -- cgit v1.2.3 From 05f9e9f7c9f477362b5adaa194a33270102abb5b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Feb 2020 19:35:29 +0000 Subject: dt-bindings: ARM: Add recent Cortex/Neoverse PMUs Add new PMU definitions to correspond with the CPU bindings. Signed-off-by: Robin Murphy Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/arm/pmu.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index 52ae094ce330..cc52195d0e9e 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -21,11 +21,20 @@ properties: - enum: - apm,potenza-pmu - arm,armv8-pmuv3 + - arm,neoverse-n1-pmu + - arm,neoverse-e1-pmu + - arm,cortex-a77-pmu + - arm,cortex-a76-pmu + - arm,cortex-a75-pmu - arm,cortex-a73-pmu - arm,cortex-a72-pmu + - arm,cortex-a65-pmu - arm,cortex-a57-pmu + - arm,cortex-a55-pmu - arm,cortex-a53-pmu - arm,cortex-a35-pmu + - arm,cortex-a34-pmu + - arm,cortex-a32-pmu - arm,cortex-a17-pmu - arm,cortex-a15-pmu - arm,cortex-a12-pmu -- cgit v1.2.3 From a8e446e49765bb680ac7681ab334d5baf9d25722 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 21 Feb 2020 19:35:30 +0000 Subject: dt-bindings: ARM: Clean up PMU compatible list The "alpha by vendor, reverse-alpha by model" sorting of compatibles that we seem to have ended up with is decidedly odd. Make it less so. Also copy the comment from the generic "arm,armv8" CPU binding to help clarify that the "arm,armv8-pmuv3" binding is rather intended to be a counterpart to that, for describing implementations without a specific microarchitecture like the AEMv8 software model. Signed-off-by: Robin Murphy Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/arm/pmu.yaml | 50 +++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index cc52195d0e9e..97df36d301c9 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -20,36 +20,36 @@ properties: items: - enum: - apm,potenza-pmu - - arm,armv8-pmuv3 - - arm,neoverse-n1-pmu - - arm,neoverse-e1-pmu - - arm,cortex-a77-pmu - - arm,cortex-a76-pmu - - arm,cortex-a75-pmu - - arm,cortex-a73-pmu - - arm,cortex-a72-pmu - - arm,cortex-a65-pmu - - arm,cortex-a57-pmu - - arm,cortex-a55-pmu - - arm,cortex-a53-pmu - - arm,cortex-a35-pmu - - arm,cortex-a34-pmu - - arm,cortex-a32-pmu - - arm,cortex-a17-pmu - - arm,cortex-a15-pmu - - arm,cortex-a12-pmu - - arm,cortex-a9-pmu - - arm,cortex-a8-pmu - - arm,cortex-a7-pmu - - arm,cortex-a5-pmu - - arm,arm11mpcore-pmu - - arm,arm1176-pmu + - arm,armv8-pmuv3 # Only for s/w models - arm,arm1136-pmu + - arm,arm1176-pmu + - arm,arm11mpcore-pmu + - arm,cortex-a5-pmu + - arm,cortex-a7-pmu + - arm,cortex-a8-pmu + - arm,cortex-a9-pmu + - arm,cortex-a12-pmu + - arm,cortex-a15-pmu + - arm,cortex-a17-pmu + - arm,cortex-a32-pmu + - arm,cortex-a34-pmu + - arm,cortex-a35-pmu + - arm,cortex-a53-pmu + - arm,cortex-a55-pmu + - arm,cortex-a57-pmu + - arm,cortex-a65-pmu + - arm,cortex-a72-pmu + - arm,cortex-a73-pmu + - arm,cortex-a75-pmu + - arm,cortex-a76-pmu + - arm,cortex-a77-pmu + - arm,neoverse-e1-pmu + - arm,neoverse-n1-pmu - brcm,vulcan-pmu - cavium,thunder-pmu + - qcom,krait-pmu - qcom,scorpion-pmu - qcom,scorpion-mp-pmu - - qcom,krait-pmu interrupts: # Don't know how many CPUs, so no constraints to specify -- cgit v1.2.3 From fa194707a90bfb4b57f545c8e9c741894e7f04f8 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Tue, 18 Feb 2020 20:52:59 +0100 Subject: Documentation: fix vxlan typo in mlx5.rst Fix a vxlan typo in the mlx5 driver documentation. Signed-off-by: Hans Wippel Signed-off-by: Saeed Mahameed --- Documentation/networking/device_drivers/mellanox/mlx5.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst index f575a49790e8..e9b65035cd47 100644 --- a/Documentation/networking/device_drivers/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst @@ -101,7 +101,7 @@ Enabling the driver and kconfig options **External options** ( Choose if the corresponding mlx5 feature is required ) - CONFIG_PTP_1588_CLOCK: When chosen, mlx5 ptp support will be enabled -- CONFIG_VXLAN: When chosen, mlx5 vxaln support will be enabled. +- CONFIG_VXLAN: When chosen, mlx5 vxlan support will be enabled. - CONFIG_MLXFW: When chosen, mlx5 firmware flashing support will be enabled (via devlink and ethtool). Devlink info -- cgit v1.2.3 From a4a417ac829d081fe65e11dc91516f411a5b6655 Mon Sep 17 00:00:00 2001 From: Jitao Shi Date: Tue, 25 Feb 2020 17:40:52 +0800 Subject: dt-bindings: display: mediatek: update dpi supported chips Add descriptions about supported chips, including MT2701 & MT8173 & mt8183 Signed-off-by: Jitao Shi Reviewed-by: Rob Herring Signed-off-by: CK Hu --- Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt index b6a7e7397b8b..58914cf681b8 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt @@ -7,6 +7,7 @@ output bus. Required properties: - compatible: "mediatek,-dpi" + the supported chips are mt2701 , mt8173 and mt8183. - reg: Physical base address and length of the controller's registers - interrupts: The interrupt signal from the function block. - clocks: device clocks -- cgit v1.2.3 From 19e5cef058a0089af4148d79af4658245a6952d2 Mon Sep 17 00:00:00 2001 From: Kamel Bouhara Date: Wed, 15 Jan 2020 13:54:17 +0200 Subject: dt-bindings: i2c: at91: document optional bus recovery properties The at91 I2C controller can support bus recovery by re-assigning SCL and SDA to gpios. Add the optional pinctrl and gpio properties to do so. Signed-off-by: Kamel Bouhara [codrin.ciubotariu@microchip.com: rebased] Signed-off-by: Codrin Ciubotariu Reviewed-by: Rob Herring Acked-by: Ludovic Desroches Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-at91.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/i2c/i2c-at91.txt b/Documentation/devicetree/bindings/i2c/i2c-at91.txt index d4bad86107b8..96c914e048f5 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-at91.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-at91.txt @@ -28,8 +28,13 @@ Optional properties: "atmel,sama5d4-i2c", "atmel,sama5d2-i2c", "microchip,sam9x60-i2c". +- scl-gpios: specify the gpio related to SCL pin +- sda-gpios: specify the gpio related to SDA pin +- pinctrl: add extra pinctrl to configure i2c pins to gpio function for i2c + bus recovery, call it "gpio" state - Child nodes conforming to i2c bus binding + Examples : i2c0: i2c@fff84000 { @@ -64,6 +69,11 @@ i2c0: i2c@f8034600 { clocks = <&flx0>; atmel,fifo-size = <16>; i2c-sda-hold-time-ns = <336>; + pinctrl-names = "default", "gpio"; + pinctrl-0 = <&pinctrl_i2c0>; + pinctrl-1 = <&pinctrl_i2c0_gpio>; + sda-gpios = <&pioA 30 GPIO_ACTIVE_HIGH>; + scl-gpios = <&pioA 31 GPIO_ACTIVE_HIGH>; wm8731: wm8731@1a { compatible = "wm8731"; -- cgit v1.2.3 From a92d083d08b008d0507f1191c3610b3e185309eb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 26 Feb 2020 13:24:23 +0200 Subject: drm/edid: Add flag to drm_display_info to identify HDMI sinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The drm_display_info structure contains many fields related to HDMI sinks, but none that identifies if a sink compliant with CEA-861 (EDID) shall be treated as an HDMI sink or a DVI sink. Add such a flag, and populate it according to section 8.3.3 ("DVI/HDMI Device Discrimination") of the HDMI v1.3 specification. Signed-off-by: Laurent Pinchart Reviewed-by: Andrzej Hajda Reviewed-by: Ville Syrjälä Reviewed-by: Daniel Vetter Reviewed-by: Boris Brezillon Acked-by: Sam Ravnborg Tested-by: Sebastian Reichel Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20200226112514.12455-4-laurent.pinchart@ideasonboard.com --- Documentation/gpu/todo.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'Documentation') diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 370ac678106e..ccf5e8e34222 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -407,6 +407,20 @@ Contact: Daniel Vetter Level: Intermediate +Replace drm_detect_hdmi_monitor() with drm_display_info.is_hdmi +--------------------------------------------------------------- + +Once EDID is parsed, the monitor HDMI support information is available through +drm_display_info.is_hdmi. Many drivers still call drm_detect_hdmi_monitor() to +retrieve the same information, which is less efficient. + +Audit each individual driver calling drm_detect_hdmi_monitor() and switch to +drm_display_info.is_hdmi if applicable. + +Contact: Laurent Pinchart, respective driver maintainers + +Level: Intermediate + Core refactorings ================= -- cgit v1.2.3 From 0451369bc5c3eda098df27f1638902fae073a0d7 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 26 Feb 2020 13:24:26 +0200 Subject: drm/bridge: Improve overview documentation Clean up the drm_bridge overview documentation, and expand the operations documentation to provide more details on API usage. Signed-off-by: Laurent Pinchart Reviewed-by: Daniel Vetter Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20200226112514.12455-7-laurent.pinchart@ideasonboard.com --- Documentation/gpu/drm-kms-helpers.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 9668a7fe2408..fe155c6ae175 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -139,11 +139,11 @@ Overview .. kernel-doc:: drivers/gpu/drm/drm_bridge.c :doc: overview -Default bridge callback sequence --------------------------------- +Bridge Operations +----------------- .. kernel-doc:: drivers/gpu/drm/drm_bridge.c - :doc: bridge callbacks + :doc: bridge operations Bridge Helper Reference -- cgit v1.2.3 From 5e20bdf3d3ded5c241650d590781bfc84d6e20ae Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 26 Feb 2020 13:24:40 +0200 Subject: drm: Add helper to create a connector for a chain of bridges Most bridge drivers create a DRM connector to model the connector at the output of the bridge. This model is historical and has worked pretty well so far, but causes several issues: - It prevents supporting more complex display pipelines where DRM connector operations are split over multiple components. For instance a pipeline with a bridge connected to the DDC signals to read EDID data, and another one connected to the HPD signal to detect connection and disconnection, will not be possible to support through this model. - It requires every bridge driver to implement similar connector handling code, resulting in code duplication. - It assumes that a bridge will either be wired to a connector or to another bridge, but doesn't support bridges that can be used in both positions very well (although there is some ad-hoc support for this in the analogix_dp bridge driver). In order to solve these issues, ownership of the connector needs to be moved to the display controller driver. To avoid code duplication in display controller drivers, add a new helper to create and manage a DRM connector backed by a chain of bridges. All connector operations are delegating to the appropriate bridge in the chain. Signed-off-by: Laurent Pinchart Reviewed-by: Boris Brezillon Acked-by: Sam Ravnborg Tested-by: Sebastian Reichel Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20200226112514.12455-21-laurent.pinchart@ideasonboard.com --- Documentation/gpu/drm-kms-helpers.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'Documentation') diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index fe155c6ae175..ee730457bf4e 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -145,6 +145,12 @@ Bridge Operations .. kernel-doc:: drivers/gpu/drm/drm_bridge.c :doc: bridge operations +Bridge Connector Helper +----------------------- + +.. kernel-doc:: drivers/gpu/drm/drm_bridge_connector.c + :doc: overview + Bridge Helper Reference ------------------------- @@ -155,6 +161,12 @@ Bridge Helper Reference .. kernel-doc:: drivers/gpu/drm/drm_bridge.c :export: +Bridge Connector Helper Reference +--------------------------------- + +.. kernel-doc:: drivers/gpu/drm/drm_bridge_connector.c + :export: + Panel-Bridge Helper Reference ----------------------------- -- cgit v1.2.3 From 238415f76084080f3085e72ead4b76775d21fc46 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Feb 2020 16:37:45 +0100 Subject: dt-bindings: example-schema: Drop double quotes around URLs It is no longer needed to wrap URLs in double quotes. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/example-schema.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/example-schema.yaml b/Documentation/devicetree/bindings/example-schema.yaml index 4ddcf709cc3c..62811a1b5058 100644 --- a/Documentation/devicetree/bindings/example-schema.yaml +++ b/Documentation/devicetree/bindings/example-schema.yaml @@ -7,9 +7,9 @@ # $id is a unique identifier based on the filename. There may or may not be a # file present at the URL. -$id: "http://devicetree.org/schemas/example-schema.yaml#" +$id: http://devicetree.org/schemas/example-schema.yaml# # $schema is the meta-schema this schema should be validated with. -$schema: "http://devicetree.org/meta-schemas/core.yaml#" +$schema: http://devicetree.org/meta-schemas/core.yaml# title: An example schema annotated with jsonschema details -- cgit v1.2.3 From 8acbbddcf9913149ef47b20f487289da02c4a291 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Feb 2020 16:41:46 +0100 Subject: dt-bindings: ata: rcar-sata: Convert to json-schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the Renesas R-Car Serial-ATA Device Tree binding documentation to json-schema. While at it: - Remove the deprecated "renesas,rcar-sata" compatible value, - Add "iommus", "power-domains", and "resets" properties, - Update the example. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Signed-off-by: Rob Herring --- .../devicetree/bindings/ata/renesas,rcar-sata.yaml | 71 ++++++++++++++++++++++ .../devicetree/bindings/ata/sata_rcar.txt | 36 ----------- 2 files changed, 71 insertions(+), 36 deletions(-) create mode 100644 Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml delete mode 100644 Documentation/devicetree/bindings/ata/sata_rcar.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml new file mode 100644 index 000000000000..7b69831060d8 --- /dev/null +++ b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/ata/renesas,rcar-sata.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Renesas R-Car Serial-ATA Interface + +maintainers: + - Geert Uytterhoeven + +properties: + compatible: + oneOf: + - items: + - enum: + - renesas,sata-r8a7779 # R-Car H1 + - items: + - enum: + - renesas,sata-r8a7790-es1 # R-Car H2 ES1 + - renesas,sata-r8a7790 # R-Car H2 other than ES1 + - renesas,sata-r8a7791 # R-Car M2-W + - renesas,sata-r8a7793 # R-Car M2-N + - const: renesas,rcar-gen2-sata # generic R-Car Gen2 + - items: + - enum: + - renesas,sata-r8a774b1 # RZ/G2N + - renesas,sata-r8a7795 # R-Car H3 + - renesas,sata-r8a77965 # R-Car M3-N + - const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + iommus: + maxItems: 1 + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + #include + #include + #include + + sata@ee300000 { + compatible = "renesas,sata-r8a7791", "renesas,rcar-gen2-sata"; + reg = <0xee300000 0x200000>; + interrupts = ; + clocks = <&cpg CPG_MOD 815>; + power-domains = <&sysc R8A7791_PD_ALWAYS_ON>; + resets = <&cpg 815>; + }; diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt deleted file mode 100644 index a2fbdc91570d..000000000000 --- a/Documentation/devicetree/bindings/ata/sata_rcar.txt +++ /dev/null @@ -1,36 +0,0 @@ -* Renesas R-Car SATA - -Required properties: -- compatible : should contain one or more of the following: - - "renesas,sata-r8a774b1" for RZ/G2N - - "renesas,sata-r8a7779" for R-Car H1 - - "renesas,sata-r8a7790-es1" for R-Car H2 ES1 - - "renesas,sata-r8a7790" for R-Car H2 other than ES1 - - "renesas,sata-r8a7791" for R-Car M2-W - - "renesas,sata-r8a7793" for R-Car M2-N - - "renesas,sata-r8a7795" for R-Car H3 - - "renesas,sata-r8a77965" for R-Car M3-N - - "renesas,rcar-gen2-sata" for a generic R-Car Gen2 - compatible device - - "renesas,rcar-gen3-sata" for a generic R-Car Gen3 or - RZ/G2 compatible device - - "renesas,rcar-sata" is deprecated - - When compatible with the generic version nodes - must list the SoC-specific version corresponding - to the platform first followed by the generic - version. - -- reg : address and length of the SATA registers; -- interrupts : must consist of one interrupt specifier. -- clocks : must contain a reference to the functional clock. - -Example: - -sata0: sata@ee300000 { - compatible = "renesas,sata-r8a7791", "renesas,rcar-gen2-sata"; - reg = <0 0xee300000 0 0x2000>; - interrupt-parent = <&gic>; - interrupts = <0 105 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&mstp8_clks R8A7791_CLK_SATA0>; -}; -- cgit v1.2.3 From 4f325e8c83e752b84d2c10b9234c5518c198e15e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Feb 2020 11:11:36 +0900 Subject: dt-bindings: serial: Convert UniPhier UART to json-schema Convert the UniPhier UART binding to DT schema format. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../bindings/serial/socionext,uniphier-uart.yaml | 46 ++++++++++++++++++++++ .../devicetree/bindings/serial/uniphier-uart.txt | 22 ----------- 2 files changed, 46 insertions(+), 22 deletions(-) create mode 100644 Documentation/devicetree/bindings/serial/socionext,uniphier-uart.yaml delete mode 100644 Documentation/devicetree/bindings/serial/uniphier-uart.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/serial/socionext,uniphier-uart.yaml b/Documentation/devicetree/bindings/serial/socionext,uniphier-uart.yaml new file mode 100644 index 000000000000..09a30300850c --- /dev/null +++ b/Documentation/devicetree/bindings/serial/socionext,uniphier-uart.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/serial/socionext,uniphier-uart.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier UART controller + +maintainers: + - Masahiro Yamada + +properties: + compatible: + const: socionext,uniphier-uart + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + minItems: 1 + + auto-flow-control: + description: enable automatic flow control support. + $ref: /schemas/types.yaml#/definitions/flag + +required: + - compatible + - reg + - interrupts + - clocks + +examples: + - | + aliases { + serial0 = &serial0; + }; + + serial0: serial@54006800 { + compatible = "socionext,uniphier-uart"; + reg = <0x54006800 0x40>; + interrupts = <0 33 4>; + clocks = <&uart_clk>; + }; diff --git a/Documentation/devicetree/bindings/serial/uniphier-uart.txt b/Documentation/devicetree/bindings/serial/uniphier-uart.txt deleted file mode 100644 index 7a1bf02bb869..000000000000 --- a/Documentation/devicetree/bindings/serial/uniphier-uart.txt +++ /dev/null @@ -1,22 +0,0 @@ -UniPhier UART controller - -Required properties: -- compatible: should be "socionext,uniphier-uart". -- reg: offset and length of the register set for the device. -- interrupts: a single interrupt specifier. -- clocks: phandle to the input clock. - -Optional properties: --auto-flow-control: enable automatic flow control support. - -Example: - aliases { - serial0 = &serial0; - }; - - serial0: serial@54006800 { - compatible = "socionext,uniphier-uart"; - reg = <0x54006800 0x40>; - interrupts = <0 33 4>; - clocks = <&uart_clk>; - }; -- cgit v1.2.3 From b279cac91583a75295b4fa1ba5f6591683b69fb4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Feb 2020 11:55:33 +0900 Subject: dt-bindings: i2c: Convert UniPhier I2C controller to json-schema Convert the UniPhier I2C controller (FIFO-less) binding to DT schema format. There are two types of I2C controllers used on the UniPhier platform. This is the legacy one without FIFO support, which is used on the sLD8 SoC or older. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../devicetree/bindings/i2c/i2c-uniphier.txt | 25 ----------- .../bindings/i2c/socionext,uniphier-i2c.yaml | 50 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 25 deletions(-) delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-uniphier.txt create mode 100644 Documentation/devicetree/bindings/i2c/socionext,uniphier-i2c.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/i2c/i2c-uniphier.txt b/Documentation/devicetree/bindings/i2c/i2c-uniphier.txt deleted file mode 100644 index 26f9d95b3436..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-uniphier.txt +++ /dev/null @@ -1,25 +0,0 @@ -UniPhier I2C controller (FIFO-less) - -Required properties: -- compatible: should be "socionext,uniphier-i2c". -- #address-cells: should be 1. -- #size-cells: should be 0. -- reg: offset and length of the register set for the device. -- interrupts: a single interrupt specifier. -- clocks: phandle to the input clock. - -Optional properties: -- clock-frequency: desired I2C bus frequency in Hz. The maximum supported - value is 400000. Defaults to 100000 if not specified. - -Examples: - - i2c0: i2c@58400000 { - compatible = "socionext,uniphier-i2c"; - reg = <0x58400000 0x40>; - #address-cells = <1>; - #size-cells = <0>; - interrupts = <0 41 1>; - clocks = <&i2c_clk>; - clock-frequency = <100000>; - }; diff --git a/Documentation/devicetree/bindings/i2c/socionext,uniphier-i2c.yaml b/Documentation/devicetree/bindings/i2c/socionext,uniphier-i2c.yaml new file mode 100644 index 000000000000..ef998def554e --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/socionext,uniphier-i2c.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/socionext,uniphier-i2c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier I2C controller (FIFO-less) + +maintainers: + - Masahiro Yamada + +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + +properties: + compatible: + const: socionext,uniphier-i2c + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-frequency: + minimum: 100000 + maximum: 400000 + +required: + - compatible + - reg + - "#address-cells" + - "#size-cells" + - interrupts + - clocks + +examples: + - | + i2c0: i2c@58400000 { + compatible = "socionext,uniphier-i2c"; + reg = <0x58400000 0x40>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <0 41 1>; + clocks = <&i2c_clk>; + clock-frequency = <100000>; + }; -- cgit v1.2.3 From 4abfe6f04d93e4aac202007486266bd1a904cc43 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Feb 2020 11:55:34 +0900 Subject: dt-bindings: i2c: Convert UniPhier FI2C controller to json-schema Convert the UniPhier FIFO-builtin I2C (a.k.a. FIFO) controller binding to DT schema format. There are two types of I2C controllers used on the UniPhier platform. This is the new one with FIFO support, which is used on the Pro4 SoC or newer. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../devicetree/bindings/i2c/i2c-uniphier-f.txt | 25 ----------- .../bindings/i2c/socionext,uniphier-fi2c.yaml | 50 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 25 deletions(-) delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-uniphier-f.txt create mode 100644 Documentation/devicetree/bindings/i2c/socionext,uniphier-fi2c.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/i2c/i2c-uniphier-f.txt b/Documentation/devicetree/bindings/i2c/i2c-uniphier-f.txt deleted file mode 100644 index 27fc6f8c798b..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-uniphier-f.txt +++ /dev/null @@ -1,25 +0,0 @@ -UniPhier I2C controller (FIFO-builtin) - -Required properties: -- compatible: should be "socionext,uniphier-fi2c". -- #address-cells: should be 1. -- #size-cells: should be 0. -- reg: offset and length of the register set for the device. -- interrupts: a single interrupt specifier. -- clocks: phandle to the input clock. - -Optional properties: -- clock-frequency: desired I2C bus frequency in Hz. The maximum supported - value is 400000. Defaults to 100000 if not specified. - -Examples: - - i2c0: i2c@58780000 { - compatible = "socionext,uniphier-fi2c"; - reg = <0x58780000 0x80>; - #address-cells = <1>; - #size-cells = <0>; - interrupts = <0 41 4>; - clocks = <&i2c_clk>; - clock-frequency = <100000>; - }; diff --git a/Documentation/devicetree/bindings/i2c/socionext,uniphier-fi2c.yaml b/Documentation/devicetree/bindings/i2c/socionext,uniphier-fi2c.yaml new file mode 100644 index 000000000000..15abc022968e --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/socionext,uniphier-fi2c.yaml @@ -0,0 +1,50 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/socionext,uniphier-fi2c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier I2C controller (FIFO-builtin) + +maintainers: + - Masahiro Yamada + +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + +properties: + compatible: + const: socionext,uniphier-fi2c + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-frequency: + minimum: 100000 + maximum: 400000 + +required: + - compatible + - reg + - "#address-cells" + - "#size-cells" + - interrupts + - clocks + +examples: + - | + i2c0: i2c@58780000 { + compatible = "socionext,uniphier-fi2c"; + reg = <0x58780000 0x80>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <0 41 4>; + clocks = <&i2c_clk>; + clock-frequency = <100000>; + }; -- cgit v1.2.3 From f87cdb1f9937e6f5234e3300804ac156e639bc00 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 26 Feb 2020 07:03:03 -0600 Subject: ASoC: dt-bindings: Add TAS2563 compatible to the TAS2562 binding Add the Texas Instruments TAS2563 audio amplifier to the TAS262 binding. Signed-off-by: Dan Murphy CC: Rob Herring Link: https://lore.kernel.org/r/20200226130305.12043-1-dmurphy@ti.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/tas2562.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/sound/tas2562.txt b/Documentation/devicetree/bindings/sound/tas2562.txt index 658e1fb18a99..94796b547184 100644 --- a/Documentation/devicetree/bindings/sound/tas2562.txt +++ b/Documentation/devicetree/bindings/sound/tas2562.txt @@ -8,7 +8,7 @@ real time monitoring of loudspeaker behavior. Required properties: - #address-cells - Should be <1>. - #size-cells - Should be <0>. - - compatible: - Should contain "ti,tas2562". + - compatible: - Should contain "ti,tas2562", "ti,tas2563". - reg: - The i2c address. Should be 0x4c, 0x4d, 0x4e or 0x4f. - ti,imon-slot-no:- TDM TX current sense time slot. -- cgit v1.2.3 From b5f25304aece9f2e7eaab275bbb5461c666bf38c Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Wed, 19 Feb 2020 17:37:11 +0100 Subject: regulator: add smb208 support Smb208 regulators are used on some ipq806x soc. Add support for it to make it avaiable on some routers that use it. Signed-off-by: Ansuel Smith Signed-off-by: Adrian Panella Acked-by: Lee Jones Link: https://lore.kernel.org/r/20200219163711.479-1-ansuelsmth@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/mfd/qcom-rpm.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/mfd/qcom-rpm.txt b/Documentation/devicetree/bindings/mfd/qcom-rpm.txt index 3c91ad430eea..b823b8625243 100644 --- a/Documentation/devicetree/bindings/mfd/qcom-rpm.txt +++ b/Documentation/devicetree/bindings/mfd/qcom-rpm.txt @@ -61,6 +61,7 @@ Regulator nodes are identified by their compatible: "qcom,rpm-pm8901-regulators" "qcom,rpm-pm8921-regulators" "qcom,rpm-pm8018-regulators" + "qcom,rpm-smb208-regulators" - vdd_l0_l1_lvs-supply: - vdd_l2_l11_l12-supply: @@ -171,6 +172,9 @@ pm8018: s1, s2, s3, s4, s5, , l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l14, lvs1 +smb208: + s1a, s1b, s2a, s2b + The content of each sub-node is defined by the standard binding for regulators - see regulator.txt - with additional custom properties described below: -- cgit v1.2.3 From 4dcd55fd703183fe64c570c53cd7f8c9177f879c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 15:04:33 +0900 Subject: dt-bindings: arm: Convert UniPhier board/SoC bindings to json-schema Convert the Socionext UniPhier board/SoC binding to DT schema format. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../devicetree/bindings/arm/socionext/uniphier.txt | 47 ----------------- .../bindings/arm/socionext/uniphier.yaml | 61 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 47 deletions(-) delete mode 100644 Documentation/devicetree/bindings/arm/socionext/uniphier.txt create mode 100644 Documentation/devicetree/bindings/arm/socionext/uniphier.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/socionext/uniphier.txt b/Documentation/devicetree/bindings/arm/socionext/uniphier.txt deleted file mode 100644 index b3ed1033740e..000000000000 --- a/Documentation/devicetree/bindings/arm/socionext/uniphier.txt +++ /dev/null @@ -1,47 +0,0 @@ -Socionext UniPhier SoC family ------------------------------ - -Required properties in the root node: - - compatible: should contain board and SoC compatible strings - -SoC and board compatible strings: - (sorted chronologically) - - - LD4 SoC: "socionext,uniphier-ld4" - - Reference Board: "socionext,uniphier-ld4-ref" - - - Pro4 SoC: "socionext,uniphier-pro4" - - Reference Board: "socionext,uniphier-pro4-ref" - - Ace Board: "socionext,uniphier-pro4-ace" - - Sanji Board: "socionext,uniphier-pro4-sanji" - - - sLD8 SoC: "socionext,uniphier-sld8" - - Reference Board: "socionext,uniphier-sld8-ref" - - - PXs2 SoC: "socionext,uniphier-pxs2" - - Gentil Board: "socionext,uniphier-pxs2-gentil" - - Vodka Board: "socionext,uniphier-pxs2-vodka" - - - LD6b SoC: "socionext,uniphier-ld6b" - - Reference Board: "socionext,uniphier-ld6b-ref" - - - LD11 SoC: "socionext,uniphier-ld11" - - Reference Board: "socionext,uniphier-ld11-ref" - - Global Board: "socionext,uniphier-ld11-global" - - - LD20 SoC: "socionext,uniphier-ld20" - - Reference Board: "socionext,uniphier-ld20-ref" - - Global Board: "socionext,uniphier-ld20-global" - - - PXs3 SoC: "socionext,uniphier-pxs3" - - Reference Board: "socionext,uniphier-pxs3-ref" - -Example: - -/dts-v1/; - -/ { - compatible = "socionext,uniphier-ld20-ref", "socionext,uniphier-ld20"; - - ... -}; diff --git a/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml b/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml new file mode 100644 index 000000000000..65ad6d8a3c99 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/socionext/uniphier.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Socionext UniPhier platform device tree bindings + +maintainers: + - Masahiro Yamada + +properties: + $nodename: + const: / + compatible: + oneOf: + - description: LD4 SoC boards + items: + - enum: + - socionext,uniphier-ld4-ref + - const: socionext,uniphier-ld4 + - description: Pro4 SoC boards + items: + - enum: + - socionext,uniphier-pro4-ace + - socionext,uniphier-pro4-ref + - socionext,uniphier-pro4-sanji + - const: socionext,uniphier-pro4 + - description: sLD8 SoC boards + items: + - enum: + - socionext,uniphier-sld8-ref + - const: socionext,uniphier-sld8 + - description: PXs2 SoC boards + items: + - enum: + - socionext,uniphier-pxs2-gentil + - socionext,uniphier-pxs2-vodka + - const: socionext,uniphier-pxs2 + - description: LD6b SoC boards + items: + - enum: + - socionext,uniphier-ld6b-ref + - const: socionext,uniphier-ld6b + - description: LD11 SoC boards + items: + - enum: + - socionext,uniphier-ld11-global + - socionext,uniphier-ld11-ref + - const: socionext,uniphier-ld11 + - description: LD20 SoC boards + items: + - enum: + - socionext,uniphier-ld20-global + - socionext,uniphier-ld20-ref + - const: socionext,uniphier-ld20 + - description: PXs3 SoC boards + items: + - enum: + - socionext,uniphier-pxs3-ref + - const: socionext,uniphier-pxs3 -- cgit v1.2.3 From 89099d1432843a7a6bbaac6d33e37365c9fe27ad Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 15:04:34 +0900 Subject: dt-bindings: gpio: Convert UniPhier GPIO to json-schema Convert the UniPhier GPIO controller binding to DT schema format. I omitted the 'gpio-ranges' property because it is defined in the dt-schema project (/schemas/gpio/gpio.yaml). As of writing, the 'gpio-ranges-group-names' is not defined in that file despite it is a common property described in Documentation/devicetree/bindings/gpio/gpio.txt So, I defined it in this schema. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../devicetree/bindings/gpio/gpio-uniphier.txt | 51 ------------ .../bindings/gpio/socionext,uniphier-gpio.yaml | 92 ++++++++++++++++++++++ 2 files changed, 92 insertions(+), 51 deletions(-) delete mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt create mode 100644 Documentation/devicetree/bindings/gpio/socionext,uniphier-gpio.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt deleted file mode 100644 index f281f12dac18..000000000000 --- a/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt +++ /dev/null @@ -1,51 +0,0 @@ -UniPhier GPIO controller - -Required properties: -- compatible: Should be "socionext,uniphier-gpio". -- reg: Specifies offset and length of the register set for the device. -- gpio-controller: Marks the device node as a GPIO controller. -- #gpio-cells: Should be 2. The first cell is the pin number and the second - cell is used to specify optional parameters. -- interrupt-controller: Marks the device node as an interrupt controller. -- #interrupt-cells: Should be 2. The first cell defines the interrupt number. - The second cell bits[3:0] is used to specify trigger type as follows: - 1 = low-to-high edge triggered - 2 = high-to-low edge triggered - 4 = active high level-sensitive - 8 = active low level-sensitive - Valid combinations are 1, 2, 3, 4, 8. -- ngpios: Specifies the number of GPIO lines. -- gpio-ranges: Mapping to pin controller pins (as described in gpio.txt) -- socionext,interrupt-ranges: Specifies an interrupt number mapping between - this GPIO controller and its interrupt parent, in the form of arbitrary - number of triplets. - -Optional properties: -- gpio-ranges-group-names: Used for named gpio ranges (as described in gpio.txt) - -Example: - gpio: gpio@55000000 { - compatible = "socionext,uniphier-gpio"; - reg = <0x55000000 0x200>; - interrupt-parent = <&aidet>; - interrupt-controller; - #interrupt-cells = <2>; - gpio-controller; - #gpio-cells = <2>; - gpio-ranges = <&pinctrl 0 0 0>; - gpio-ranges-group-names = "gpio_range"; - ngpios = <248>; - socionext,interrupt-ranges = <0 48 16>, <16 154 5>, <21 217 3>; - }; - -Consumer Example: - - sdhci0_pwrseq { - compatible = "mmc-pwrseq-emmc"; - reset-gpios = <&gpio UNIPHIER_GPIO_PORT(29, 4) GPIO_ACTIVE_LOW>; - }; - -Please note UNIPHIER_GPIO_PORT(29, 4) represents PORT294 in the SoC document. -Unfortunately, only the one's place is octal in the port numbering. (That is, -PORT 8, 9, 18, 19, 28, 29, ... are missing.) UNIPHIER_GPIO_PORT() is a helper -macro to calculate 29 * 8 + 4. diff --git a/Documentation/devicetree/bindings/gpio/socionext,uniphier-gpio.yaml b/Documentation/devicetree/bindings/gpio/socionext,uniphier-gpio.yaml new file mode 100644 index 000000000000..580a39e09d39 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/socionext,uniphier-gpio.yaml @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/socionext,uniphier-gpio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier GPIO controller + +maintainers: + - Masahiro Yamada + +properties: + $nodename: + pattern: "^gpio@[0-9a-f]+$" + + compatible: + const: socionext,uniphier-gpio + + reg: + maxItems: 1 + + gpio-controller: true + + "#gpio-cells": + const: 2 + + interrupt-controller: true + + "#interrupt-cells": + description: | + The first cell defines the interrupt number. + The second cell bits[3:0] is used to specify trigger type as follows: + 1 = low-to-high edge triggered + 2 = high-to-low edge triggered + 4 = active high level-sensitive + 8 = active low level-sensitive + Valid combinations are 1, 2, 3, 4, 8. + const: 2 + + ngpios: + minimum: 0 + maximum: 512 + + gpio-ranges-group-names: + $ref: /schemas/types.yaml#/definitions/string-array + + socionext,interrupt-ranges: + description: | + Specifies an interrupt number mapping between this GPIO controller and + its interrupt parent, in the form of arbitrary number of + triplets. + $ref: /schemas/types.yaml#/definitions/uint32-matrix + +required: + - compatible + - reg + - gpio-controller + - "#gpio-cells" + - interrupt-controller + - "#interrupt-cells" + - ngpios + - gpio-ranges + - socionext,interrupt-ranges + +examples: + - | + #include + #include + + gpio: gpio@55000000 { + compatible = "socionext,uniphier-gpio"; + reg = <0x55000000 0x200>; + interrupt-parent = <&aidet>; + interrupt-controller; + #interrupt-cells = <2>; + gpio-controller; + #gpio-cells = <2>; + gpio-ranges = <&pinctrl 0 0 0>; + gpio-ranges-group-names = "gpio_range"; + ngpios = <248>; + socionext,interrupt-ranges = <0 48 16>, <16 154 5>, <21 217 3>; + }; + + // Consumer: + // Please note UNIPHIER_GPIO_PORT(29, 4) represents PORT294 in the SoC + // document. Unfortunately, only the one's place is octal in the port + // numbering. (That is, PORT 8, 9, 18, 19, 28, 29, ... do not exist.) + // UNIPHIER_GPIO_PORT() is a helper macro to calculate 29 * 8 + 4. + sdhci0_pwrseq { + compatible = "mmc-pwrseq-emmc"; + reset-gpios = <&gpio UNIPHIER_GPIO_PORT(29, 4) GPIO_ACTIVE_LOW>; + }; -- cgit v1.2.3 From 849b10446bf939eba3d8955c89c4433f8529248e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 15:04:35 +0900 Subject: dt-bindings: pinctrl: Convert UniPhier pin controller to json-schema Convert the UniPhier pin controller binding to DT schema format. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../pinctrl/socionext,uniphier-pinctrl.txt | 27 -------------- .../pinctrl/socionext,uniphier-pinctrl.yaml | 42 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 27 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.txt create mode 100644 Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.txt deleted file mode 100644 index 8173b12138ad..000000000000 --- a/Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.txt +++ /dev/null @@ -1,27 +0,0 @@ -UniPhier SoCs pin controller - -Required properties: -- compatible: should be one of the following: - "socionext,uniphier-ld4-pinctrl" - for LD4 SoC - "socionext,uniphier-pro4-pinctrl" - for Pro4 SoC - "socionext,uniphier-sld8-pinctrl" - for sLD8 SoC - "socionext,uniphier-pro5-pinctrl" - for Pro5 SoC - "socionext,uniphier-pxs2-pinctrl" - for PXs2 SoC - "socionext,uniphier-ld6b-pinctrl" - for LD6b SoC - "socionext,uniphier-ld11-pinctrl" - for LD11 SoC - "socionext,uniphier-ld20-pinctrl" - for LD20 SoC - "socionext,uniphier-pxs3-pinctrl" - for PXs3 SoC - -Note: -The UniPhier pinctrl should be a subnode of a "syscon" compatible node. - -Example: - soc-glue@5f800000 { - compatible = "socionext,uniphier-pro4-soc-glue", - "simple-mfd", "syscon"; - reg = <0x5f800000 0x2000>; - - pinctrl: pinctrl { - compatible = "socionext,uniphier-pro4-pinctrl"; - }; - }; diff --git a/Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.yaml new file mode 100644 index 000000000000..f8a93d8680f9 --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/socionext,uniphier-pinctrl.yaml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pinctrl/socionext,uniphier-pinctrl.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier SoCs pin controller + +maintainers: + - Masahiro Yamada + +properties: + $nodename: + pattern: "pinctrl" + + compatible: + enum: + - socionext,uniphier-ld4-pinctrl + - socionext,uniphier-pro4-pinctrl + - socionext,uniphier-sld8-pinctrl + - socionext,uniphier-pro5-pinctrl + - socionext,uniphier-pxs2-pinctrl + - socionext,uniphier-ld6b-pinctrl + - socionext,uniphier-ld11-pinctrl + - socionext,uniphier-ld20-pinctrl + - socionext,uniphier-pxs3-pinctrl + +required: + - compatible + +examples: + - | + // The UniPhier pinctrl should be a subnode of a "syscon" compatible node. + + soc-glue@5f800000 { + compatible = "socionext,uniphier-pro4-soc-glue", "simple-mfd", "syscon"; + reg = <0x5f800000 0x2000>; + + pinctrl: pinctrl { + compatible = "socionext,uniphier-pro4-pinctrl"; + }; + }; -- cgit v1.2.3 From 7f44205eb28ce1e7df947a3a2acd7da678c62940 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 15:11:09 +0900 Subject: dt-bindings: bus: Convert UniPhier System Bus to json-schema Convert the UniPhier System Bus controller binding to DT schema format. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../bus/socionext,uniphier-system-bus.yaml | 96 ++++++++++++++++++++++ .../bindings/bus/uniphier-system-bus.txt | 66 --------------- 2 files changed, 96 insertions(+), 66 deletions(-) create mode 100644 Documentation/devicetree/bindings/bus/socionext,uniphier-system-bus.yaml delete mode 100644 Documentation/devicetree/bindings/bus/uniphier-system-bus.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/bus/socionext,uniphier-system-bus.yaml b/Documentation/devicetree/bindings/bus/socionext,uniphier-system-bus.yaml new file mode 100644 index 000000000000..ff9600d6de3b --- /dev/null +++ b/Documentation/devicetree/bindings/bus/socionext,uniphier-system-bus.yaml @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/bus/socionext,uniphier-system-bus.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier System Bus + +description: | + The UniPhier System Bus is an external bus that connects on-board devices to + the UniPhier SoC. It is a simple (semi-)parallel bus with address, data, and + some control signals. It supports up to 8 banks (chip selects). + + Before any access to the bus, the bus controller must be configured; the bus + controller registers provide the control for the translation from the offset + within each bank to the CPU-viewed address. The needed setup includes the + base address, the size of each bank. Optionally, some timing parameters can + be optimized for faster bus access. + +maintainers: + - Masahiro Yamada + +properties: + compatible: + const: socionext,uniphier-system-bus + + reg: + maxItems: 1 + + "#address-cells": + description: | + The first cell is the bank number (chip select). + The second cell is the address offset within the bank. + const: 2 + + "#size-cells": + const: 1 + + ranges: + description: | + Provide address translation from the System Bus to the parent bus. + + Note: + The address region(s) that can be assigned for the System Bus is + implementation defined. Some SoCs can use 0x00000000-0x0fffffff and + 0x40000000-0x4fffffff, while other SoCs only 0x40000000-0x4fffffff. + There might be additional limitations depending on SoCs and the boot mode. + The address translation is arbitrary as long as the banks are assigned in + the supported address space with the required alignment and they do not + overlap one another. + + For example, it is possible to map: + bank 0 to 0x42000000-0x43ffffff, bank 5 to 0x46000000-0x46ffffff + It is also possible to map: + bank 0 to 0x48000000-0x49ffffff, bank 5 to 0x44000000-0x44ffffff + There is no reason to stick to a particular translation mapping, but the + "ranges" property should provide a "reasonable" default that is known to + work. The software should initialize the bus controller according to it. + +required: + - compatible + - reg + - "#address-cells" + - "#size-cells" + - ranges + +examples: + - | + // In this example, + // - the Ethernet device is connected at the offset 0x01f00000 of CS1 and + // mapped to 0x43f00000 of the parent bus. + // - the UART device is connected at the offset 0x00200000 of CS5 and + // mapped to 0x46200000 of the parent bus. + + system-bus { + compatible = "socionext,uniphier-system-bus"; + reg = <0x58c00000 0x400>; + #address-cells = <2>; + #size-cells = <1>; + ranges = <1 0x00000000 0x42000000 0x02000000>, + <5 0x00000000 0x46000000 0x01000000>; + + ethernet@1,01f00000 { + compatible = "smsc,lan9115"; + reg = <1 0x01f00000 0x1000>; + interrupts = <0 48 4>; + phy-mode = "mii"; + }; + + uart@5,00200000 { + compatible = "ns16550a"; + reg = <5 0x00200000 0x20>; + interrupts = <0 49 4>; + clock-frequency = <12288000>; + }; + }; diff --git a/Documentation/devicetree/bindings/bus/uniphier-system-bus.txt b/Documentation/devicetree/bindings/bus/uniphier-system-bus.txt deleted file mode 100644 index 68ef80afff16..000000000000 --- a/Documentation/devicetree/bindings/bus/uniphier-system-bus.txt +++ /dev/null @@ -1,66 +0,0 @@ -UniPhier System Bus - -The UniPhier System Bus is an external bus that connects on-board devices to -the UniPhier SoC. It is a simple (semi-)parallel bus with address, data, and -some control signals. It supports up to 8 banks (chip selects). - -Before any access to the bus, the bus controller must be configured; the bus -controller registers provide the control for the translation from the offset -within each bank to the CPU-viewed address. The needed setup includes the base -address, the size of each bank. Optionally, some timing parameters can be -optimized for faster bus access. - -Required properties: -- compatible: should be "socionext,uniphier-system-bus". -- reg: offset and length of the register set for the bus controller device. -- #address-cells: should be 2. The first cell is the bank number (chip select). - The second cell is the address offset within the bank. -- #size-cells: should be 1. -- ranges: should provide a proper address translation from the System Bus to - the parent bus. - -Note: -The address region(s) that can be assigned for the System Bus is implementation -defined. Some SoCs can use 0x00000000-0x0fffffff and 0x40000000-0x4fffffff, -while other SoCs can only use 0x40000000-0x4fffffff. There might be additional -limitations depending on SoCs and the boot mode. The address translation is -arbitrary as long as the banks are assigned in the supported address space with -the required alignment and they do not overlap one another. -For example, it is possible to map: - bank 0 to 0x42000000-0x43ffffff, bank 5 to 0x46000000-0x46ffffff -It is also possible to map: - bank 0 to 0x48000000-0x49ffffff, bank 5 to 0x44000000-0x44ffffff -There is no reason to stick to a particular translation mapping, but the -"ranges" property should provide a "reasonable" default that is known to work. -The software should initialize the bus controller according to it. - -Example: - - system-bus { - compatible = "socionext,uniphier-system-bus"; - reg = <0x58c00000 0x400>; - #address-cells = <2>; - #size-cells = <1>; - ranges = <1 0x00000000 0x42000000 0x02000000 - 5 0x00000000 0x46000000 0x01000000>; - - ethernet@1,01f00000 { - compatible = "smsc,lan9115"; - reg = <1 0x01f00000 0x1000>; - interrupts = <0 48 4> - phy-mode = "mii"; - }; - - uart@5,00200000 { - compatible = "ns16550a"; - reg = <5 0x00200000 0x20>; - interrupts = <0 49 4> - clock-frequency = <12288000>; - }; - }; - -In this example, - - the Ethernet device is connected at the offset 0x01f00000 of CS1 and - mapped to 0x43f00000 of the parent bus. - - the UART device is connected at the offset 0x00200000 of CS5 and - mapped to 0x46200000 of the parent bus. -- cgit v1.2.3 From 6bb984a3cea02b76d54f2b4bcad7ef0cdca52bc6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 20:04:35 +0900 Subject: dt-bindings: interrupt-controller: Convert UniPhier AIDET to json-schema Convert the UniPhier AIDET (ARM Interrupt Detector) binding to DT schema format. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../socionext,uniphier-aidet.txt | 32 ------------ .../socionext,uniphier-aidet.yaml | 61 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 32 deletions(-) delete mode 100644 Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt create mode 100644 Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt deleted file mode 100644 index 48e71d3ac2ad..000000000000 --- a/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.txt +++ /dev/null @@ -1,32 +0,0 @@ -UniPhier AIDET - -UniPhier AIDET (ARM Interrupt Detector) is an add-on block for ARM GIC (Generic -Interrupt Controller). GIC itself can handle only high level and rising edge -interrupts. The AIDET provides logic inverter to support low level and falling -edge interrupts. - -Required properties: -- compatible: Should be one of the following: - "socionext,uniphier-ld4-aidet" - for LD4 SoC - "socionext,uniphier-pro4-aidet" - for Pro4 SoC - "socionext,uniphier-sld8-aidet" - for sLD8 SoC - "socionext,uniphier-pro5-aidet" - for Pro5 SoC - "socionext,uniphier-pxs2-aidet" - for PXs2/LD6b SoC - "socionext,uniphier-ld11-aidet" - for LD11 SoC - "socionext,uniphier-ld20-aidet" - for LD20 SoC - "socionext,uniphier-pxs3-aidet" - for PXs3 SoC -- reg: Specifies offset and length of the register set for the device. -- interrupt-controller: Identifies the node as an interrupt controller -- #interrupt-cells : Specifies the number of cells needed to encode an interrupt - source. The value should be 2. The first cell defines the interrupt number - (corresponds to the SPI interrupt number of GIC). The second cell specifies - the trigger type as defined in interrupts.txt in this directory. - -Example: - - aidet: aidet@5fc20000 { - compatible = "socionext,uniphier-pro4-aidet"; - reg = <0x5fc20000 0x200>; - interrupt-controller; - #interrupt-cells = <2>; - }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml new file mode 100644 index 000000000000..f89ebde76dab --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/socionext,uniphier-aidet.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/socionext,uniphier-aidet.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier AIDET + +description: | + UniPhier AIDET (ARM Interrupt Detector) is an add-on block for ARM GIC + (Generic Interrupt Controller). GIC itself can handle only high level and + rising edge interrupts. The AIDET provides logic inverter to support low + level and falling edge interrupts. + +maintainers: + - Masahiro Yamada + +allOf: + - $ref: /schemas/interrupt-controller.yaml# + +properties: + compatible: + enum: + - socionext,uniphier-ld4-aidet + - socionext,uniphier-pro4-aidet + - socionext,uniphier-sld8-aidet + - socionext,uniphier-pro5-aidet + - socionext,uniphier-pxs2-aidet + - socionext,uniphier-ld6b-aidet + - socionext,uniphier-ld11-aidet + - socionext,uniphier-ld20-aidet + - socionext,uniphier-pxs3-aidet + + reg: + maxItems: 1 + + interrupt-controller: true + + '#interrupt-cells': + description: | + The first cell defines the interrupt number (corresponds to the SPI + interrupt number of GIC). The second cell specifies the trigger type as + defined in interrupts.txt in this directory. + const: 2 + +required: + - compatible + - reg + - interrupt-controller + - '#interrupt-cells' + +additionalProperties: false + +examples: + - | + interrupt-controller@5fc20000 { + compatible = "socionext,uniphier-pro4-aidet"; + reg = <0x5fc20000 0x200>; + interrupt-controller; + #interrupt-cells = <2>; + }; -- cgit v1.2.3 From 7ebec90559ff772783208c88510632d0ced90da6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 20:20:42 +0900 Subject: dt-bindings: dma: Convert UniPhier MIO DMA controller to json-schema Convert the UniPhier MIO (Media I/O) DMA controller binding to DT schema format. While I was here, I added the resets property. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../bindings/dma/socionext,uniphier-mio-dmac.yaml | 63 ++++++++++++++++++++++ .../devicetree/bindings/dma/uniphier-mio-dmac.txt | 25 --------- 2 files changed, 63 insertions(+), 25 deletions(-) create mode 100644 Documentation/devicetree/bindings/dma/socionext,uniphier-mio-dmac.yaml delete mode 100644 Documentation/devicetree/bindings/dma/uniphier-mio-dmac.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/socionext,uniphier-mio-dmac.yaml b/Documentation/devicetree/bindings/dma/socionext,uniphier-mio-dmac.yaml new file mode 100644 index 000000000000..e7bf6dd7da29 --- /dev/null +++ b/Documentation/devicetree/bindings/dma/socionext,uniphier-mio-dmac.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/socionext,uniphier-mio-dmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier Media IO DMA controller + +description: | + This works as an external DMA engine for SD/eMMC controllers etc. + found in UniPhier LD4, Pro4, sLD8 SoCs. + +maintainers: + - Masahiro Yamada + +allOf: + - $ref: "dma-controller.yaml#" + +properties: + compatible: + const: socionext,uniphier-mio-dmac + + reg: + maxItems: 1 + + interrupts: + description: | + A list of interrupt specifiers associated with the DMA channels. + The number of interrupt lines is SoC-dependent. + + clocks: + maxItems: 1 + + resets: + maxItems: 1 + + '#dma-cells': + description: The single cell represents the channel index. + const: 1 + +required: + - compatible + - reg + - interrupts + - clocks + - '#dma-cells' + +additionalProperties: false + +examples: + - | + // In the example below, "interrupts = <0 68 4>, <0 68 4>, ..." is not a + // typo. The first two channels share a single interrupt line. + + dmac: dma-controller@5a000000 { + compatible = "socionext,uniphier-mio-dmac"; + reg = <0x5a000000 0x1000>; + interrupts = <0 68 4>, <0 68 4>, <0 69 4>, <0 70 4>, + <0 71 4>, <0 72 4>, <0 73 4>, <0 74 4>; + clocks = <&mio_clk 7>; + resets = <&mio_rst 7>; + #dma-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/dma/uniphier-mio-dmac.txt b/Documentation/devicetree/bindings/dma/uniphier-mio-dmac.txt deleted file mode 100644 index b12388dc7eac..000000000000 --- a/Documentation/devicetree/bindings/dma/uniphier-mio-dmac.txt +++ /dev/null @@ -1,25 +0,0 @@ -UniPhier Media IO DMA controller - -This works as an external DMA engine for SD/eMMC controllers etc. -found in UniPhier LD4, Pro4, sLD8 SoCs. - -Required properties: -- compatible: should be "socionext,uniphier-mio-dmac". -- reg: offset and length of the register set for the device. -- interrupts: a list of interrupt specifiers associated with the DMA channels. -- clocks: a single clock specifier. -- #dma-cells: should be <1>. The single cell represents the channel index. - -Example: - dmac: dma-controller@5a000000 { - compatible = "socionext,uniphier-mio-dmac"; - reg = <0x5a000000 0x1000>; - interrupts = <0 68 4>, <0 68 4>, <0 69 4>, <0 70 4>, - <0 71 4>, <0 72 4>, <0 73 4>, <0 74 4>; - clocks = <&mio_clk 7>; - #dma-cells = <1>; - }; - -Note: -In the example above, "interrupts = <0 68 4>, <0 68 4>, ..." is not a typo. -The first two channels share a single interrupt line. -- cgit v1.2.3 From 8dbdf23ae30af3008428dec1d199d7646e13e70d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 20:25:41 +0900 Subject: dt-bindings: mmc: Convert UniPhier SD controller to json-schema Convert the UniPhier SD controller binding to DT schema format. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../bindings/mmc/socionext,uniphier-sd.yaml | 99 ++++++++++++++++++++++ .../devicetree/bindings/mmc/uniphier-sd.txt | 55 ------------ 2 files changed, 99 insertions(+), 55 deletions(-) create mode 100644 Documentation/devicetree/bindings/mmc/socionext,uniphier-sd.yaml delete mode 100644 Documentation/devicetree/bindings/mmc/uniphier-sd.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/mmc/socionext,uniphier-sd.yaml b/Documentation/devicetree/bindings/mmc/socionext,uniphier-sd.yaml new file mode 100644 index 000000000000..cdfac9b4411b --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/socionext,uniphier-sd.yaml @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mmc/socionext,uniphier-sd.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier SD/SDIO/eMMC controller + +maintainers: + - Masahiro Yamada + +properties: + compatible: + description: version 2.91, 3.1, 3.1.1, respectively + enum: + - socionext,uniphier-sd-v2.91 + - socionext,uniphier-sd-v3.1 + - socionext,uniphier-sd-v3.1.1 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + reset-names: + description: | + There are three reset signals at maximum + host: mandatory for all variants + bridge: exist only for version 2.91 + hw: optional. exist if eMMC hw reset line is available + oneOf: + - const: host + - items: + - const: host + - const: bridge + - items: + - const: host + - const: hw + - items: + - const: host + - const: bridge + - const: hw + + resets: + minItems: 1 + maxItems: 3 + +allOf: + - $ref: mmc-controller.yaml + + - if: + properties: + compatible: + contains: + const: socionext,uniphier-sd-v2.91 + then: + properties: + reset-names: + contains: + const: bridge + else: + properties: + reset-names: + not: + contains: + const: bridge + +required: + - compatible + - reg + - interrupts + - clocks + - reset-names + - resets + +examples: + - | + sd: mmc@5a400000 { + compatible = "socionext,uniphier-sd-v2.91"; + reg = <0x5a400000 0x200>; + interrupts = <0 76 4>; + pinctrl-names = "default", "uhs"; + pinctrl-0 = <&pinctrl_sd>; + pinctrl-1 = <&pinctrl_sd_uhs>; + clocks = <&mio_clk 0>; + reset-names = "host", "bridge"; + resets = <&mio_rst 0>, <&mio_rst 3>; + dma-names = "rx-tx"; + dmas = <&dmac 4>; + bus-width = <4>; + cap-sd-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + }; diff --git a/Documentation/devicetree/bindings/mmc/uniphier-sd.txt b/Documentation/devicetree/bindings/mmc/uniphier-sd.txt deleted file mode 100644 index e1d658755722..000000000000 --- a/Documentation/devicetree/bindings/mmc/uniphier-sd.txt +++ /dev/null @@ -1,55 +0,0 @@ -UniPhier SD/eMMC controller - -Required properties: -- compatible: should be one of the following: - "socionext,uniphier-sd-v2.91" - IP version 2.91 - "socionext,uniphier-sd-v3.1" - IP version 3.1 - "socionext,uniphier-sd-v3.1.1" - IP version 3.1.1 -- reg: offset and length of the register set for the device. -- interrupts: a single interrupt specifier. -- clocks: a single clock specifier of the controller clock. -- reset-names: should contain the following: - "host" - mandatory for all versions - "bridge" - should exist only for "socionext,uniphier-sd-v2.91" - "hw" - should exist if eMMC hw reset line is available -- resets: a list of reset specifiers, corresponding to the reset-names - -Optional properties: -- pinctrl-names: if present, should contain the following: - "default" - should exist for all instances - "uhs" - should exist for SD instance with UHS support -- pinctrl-0: pin control state for the default mode -- pinctrl-1: pin control state for the UHS mode -- dma-names: should be "rx-tx" if present. - This property can exist only for "socionext,uniphier-sd-v2.91". -- dmas: a single DMA channel specifier - This property can exist only for "socionext,uniphier-sd-v2.91". -- bus-width: see mmc.txt -- cap-sd-highspeed: see mmc.txt -- cap-mmc-highspeed: see mmc.txt -- sd-uhs-sdr12: see mmc.txt -- sd-uhs-sdr25: see mmc.txt -- sd-uhs-sdr50: see mmc.txt -- cap-mmc-hw-reset: should exist if reset-names contains "hw". see mmc.txt -- non-removable: see mmc.txt - -Example: - - sd: sdhc@5a400000 { - compatible = "socionext,uniphier-sd-v2.91"; - reg = <0x5a400000 0x200>; - interrupts = <0 76 4>; - pinctrl-names = "default", "uhs"; - pinctrl-0 = <&pinctrl_sd>; - pinctrl-1 = <&pinctrl_sd_uhs>; - clocks = <&mio_clk 0>; - reset-names = "host", "bridge"; - resets = <&mio_rst 0>, <&mio_rst 3>; - dma-names = "rx-tx"; - dmas = <&dmac 4>; - bus-width = <4>; - cap-sd-highspeed; - sd-uhs-sdr12; - sd-uhs-sdr25; - sd-uhs-sdr50; - }; -- cgit v1.2.3 From fe6c2d6a80680a875a856eb174d12acea7681247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?= Date: Wed, 19 Feb 2020 16:29:31 +0800 Subject: dt-bindings: timer: Add X1000 bindings. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the timer bindings for the X1000 Soc from Ingenic. Signed-off-by: 周琰杰 (Zhou Yanjie) Acked-by: Rob Herring Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1582100974-129559-3-git-send-email-zhouyanjie@wanyeetech.com --- Documentation/devicetree/bindings/timer/ingenic,tcu.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/timer/ingenic,tcu.txt b/Documentation/devicetree/bindings/timer/ingenic,tcu.txt index 0b63cebc5f45..91f704951845 100644 --- a/Documentation/devicetree/bindings/timer/ingenic,tcu.txt +++ b/Documentation/devicetree/bindings/timer/ingenic,tcu.txt @@ -10,6 +10,7 @@ Required properties: * ingenic,jz4740-tcu * ingenic,jz4725b-tcu * ingenic,jz4770-tcu + * ingenic,x1000-tcu followed by "simple-mfd". - reg: Should be the offset/length value corresponding to the TCU registers - clocks: List of phandle & clock specifiers for clocks external to the TCU. -- cgit v1.2.3 From c153a892f674af10f3c545e7eb97efe6c1a21c27 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Wed, 26 Feb 2020 00:10:08 -0800 Subject: dt-bindings: Add Guangdong Neweast Optoelectronics CO. LTD vendor prefix Add vendor prefix for Guangdong Neweast Optoelectronics CO. LTD Acked-by: Rob Herring Signed-off-by: Vasily Khoruzhick Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200226081011.1347245-4-anarsoul@gmail.com --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 9a0cca2fd166..f70cf5f3e1b8 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -665,6 +665,8 @@ patternProperties: description: Netron DY "^netxeon,.*": description: Shenzhen Netxeon Technology CO., LTD + "^neweast,.*": + description: Guangdong Neweast Optoelectronics CO., LTD "^nexbox,.*": description: Nexbox "^nextthing,.*": -- cgit v1.2.3 From 0c1064095926b28ef770046fad99d6385a972abb Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Wed, 26 Feb 2020 00:10:09 -0800 Subject: dt-bindings: display: simple: Add NewEast Optoelectronics WJFH116008A compatible This commit adds compatible for NewEast Optoelectronics WJFH116008A panel to panel-simple binding Reviewed-by: Laurent Pinchart Signed-off-by: Vasily Khoruzhick Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200226081011.1347245-5-anarsoul@gmail.com --- Documentation/devicetree/bindings/display/panel/panel-simple.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml index e717018d34ae..393ffc6acbba 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-simple.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-simple.yaml @@ -177,6 +177,8 @@ properties: - nec,nl4827hc19-05b # Netron-DY E231732 7.0" WSVGA TFT LCD panel - netron-dy,e231732 + # NewEast Optoelectronics CO., LTD WJFH116008A eDP TFT LCD panel + - neweast,wjfh116008a # Newhaven Display International 480 x 272 TFT LCD panel - newhaven,nhd-4.3-480272ef-atxl # NLT Technologies, Ltd. 15.6" FHD (1920x1080) LVDS TFT LCD panel -- cgit v1.2.3 From a886ed26568646cb542866207563e2ad9afcde69 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Thu, 27 Feb 2020 02:26:49 +0100 Subject: dt-bindings: arm: sunxi: Add PinePhone 1.0 and 1.1 bindings Document board compatible names for Pine64 PinePhone: - 1.0 - Developer variant - 1.1 - Braveheart variant Signed-off-by: Ondrej Jirman Reviewed-by: Rob Herring Signed-off-by: Maxime Ripard --- Documentation/devicetree/bindings/arm/sunxi.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml index 5b22b77e4bb7..abf2d97fb7ae 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.yaml +++ b/Documentation/devicetree/bindings/arm/sunxi.yaml @@ -642,6 +642,16 @@ properties: - const: pine64,pinebook - const: allwinner,sun50i-a64 + - description: Pine64 PinePhone Developer Batch (1.0) + items: + - const: pine64,pinephone-1.0 + - const: allwinner,sun50i-a64 + + - description: Pine64 PinePhone Braveheart (1.1) + items: + - const: pine64,pinephone-1.1 + - const: allwinner,sun50i-a64 + - description: Pine64 PineTab items: - const: pine64,pinetab -- cgit v1.2.3 From 82ba4997129a47b540d3d6714975b252f8353468 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 23:19:26 +0900 Subject: dt-bindings: mtd: Convert Denali NAND controller to json-schema Convert the Denali NAND controller binding to DT schema format. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../devicetree/bindings/mtd/denali,nand.yaml | 148 +++++++++++++++++++++ .../devicetree/bindings/mtd/denali-nand.txt | 61 --------- 2 files changed, 148 insertions(+), 61 deletions(-) create mode 100644 Documentation/devicetree/bindings/mtd/denali,nand.yaml delete mode 100644 Documentation/devicetree/bindings/mtd/denali-nand.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/mtd/denali,nand.yaml b/Documentation/devicetree/bindings/mtd/denali,nand.yaml new file mode 100644 index 000000000000..46e6b6726bc0 --- /dev/null +++ b/Documentation/devicetree/bindings/mtd/denali,nand.yaml @@ -0,0 +1,148 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mtd/denali,nand.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Denali NAND controller + +maintainers: + - Masahiro Yamada + +properties: + compatible: + enum: + - altr,socfpga-denali-nand + - socionext,uniphier-denali-nand-v5a + - socionext,uniphier-denali-nand-v5b + + reg-names: + description: | + There are two register regions: + nand_data: host data/command interface + denali_reg: register interface + items: + - const: nand_data + - const: denali_reg + + reg: + minItems: 2 + maxItems: 2 + + interrupts: + maxItems: 1 + + clock-names: + description: | + There are three clocks: + nand: controller core clock + nand_x: bus interface clock + ecc: ECC circuit clock + items: + - const: nand + - const: nand_x + - const: ecc + + clocks: + minItems: 3 + maxItems: 3 + + reset-names: + description: | + There are two optional resets: + nand: controller core reset + reg: register reset + oneOf: + - items: + - const: nand + - const: reg + - const: nand + - const: reg + + resets: + minItems: 1 + maxItems: 2 + +allOf: + - $ref: nand-controller.yaml + + - if: + properties: + compatible: + contains: + const: altr,socfpga-denali-nand + then: + patternProperties: + "^nand@[a-f0-9]$": + type: object + properties: + nand-ecc-strength: + enum: + - 8 + - 15 + nand-ecc-step-size: + enum: + - 512 + + - if: + properties: + compatible: + contains: + const: socionext,uniphier-denali-nand-v5a + then: + patternProperties: + "^nand@[a-f0-9]$": + type: object + properties: + nand-ecc-strength: + enum: + - 8 + - 16 + - 24 + nand-ecc-step-size: + enum: + - 1024 + + - if: + properties: + compatible: + contains: + const: socionext,uniphier-denali-nand-v5b + then: + patternProperties: + "^nand@[a-f0-9]$": + type: object + properties: + nand-ecc-strength: + enum: + - 8 + - 16 + nand-ecc-step-size: + enum: + - 1024 + +required: + - compatible + - reg + - interrupts + - clock-names + - clocks + +examples: + - | + nand-controller@ff900000 { + compatible = "altr,socfpga-denali-nand"; + reg-names = "nand_data", "denali_reg"; + reg = <0xff900000 0x20>, <0xffb80000 0x1000>; + interrupts = <0 144 4>; + clock-names = "nand", "nand_x", "ecc"; + clocks = <&nand_clk>, <&nand_x_clk>, <&nand_ecc_clk>; + reset-names = "nand", "reg"; + resets = <&nand_rst>, <&nand_reg_rst>; + #address-cells = <1>; + #size-cells = <0>; + + nand@0 { + reg = <0>; + }; + }; diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt deleted file mode 100644 index 98916a84bbf6..000000000000 --- a/Documentation/devicetree/bindings/mtd/denali-nand.txt +++ /dev/null @@ -1,61 +0,0 @@ -* Denali NAND controller - -Required properties: - - compatible : should be one of the following: - "altr,socfpga-denali-nand" - for Altera SOCFPGA - "socionext,uniphier-denali-nand-v5a" - for Socionext UniPhier (v5a) - "socionext,uniphier-denali-nand-v5b" - for Socionext UniPhier (v5b) - - reg : should contain registers location and length for data and reg. - - reg-names: Should contain the reg names "nand_data" and "denali_reg" - - #address-cells: should be 1. The cell encodes the chip select connection. - - #size-cells : should be 0. - - interrupts : The interrupt number. - - clocks: should contain phandle of the controller core clock, the bus - interface clock, and the ECC circuit clock. - - clock-names: should contain "nand", "nand_x", "ecc" - -Optional properties: - - resets: may contain phandles to the controller core reset, the register - reset - - reset-names: may contain "nand", "reg" - -Sub-nodes: - Sub-nodes represent available NAND chips. - - Required properties: - - reg: should contain the bank ID of the controller to which each chip - select is connected. - - Optional properties: - - nand-ecc-step-size: see nand-controller.yaml for details. - If present, the value must be - 512 for "altr,socfpga-denali-nand" - 1024 for "socionext,uniphier-denali-nand-v5a" - 1024 for "socionext,uniphier-denali-nand-v5b" - - nand-ecc-strength: see nand-controller.yaml for details. Valid values are: - 8, 15 for "altr,socfpga-denali-nand" - 8, 16, 24 for "socionext,uniphier-denali-nand-v5a" - 8, 16 for "socionext,uniphier-denali-nand-v5b" - - nand-ecc-maximize: see nand-controller.yaml for details - -The chip nodes may optionally contain sub-nodes describing partitions of the -address space. See partition.txt for more detail. - -Examples: - -nand: nand@ff900000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "altr,socfpga-denali-nand"; - reg = <0xff900000 0x20>, <0xffb80000 0x1000>; - reg-names = "nand_data", "denali_reg"; - clocks = <&nand_clk>, <&nand_x_clk>, <&nand_ecc_clk>; - clock-names = "nand", "nand_x", "ecc"; - resets = <&nand_rst>, <&nand_reg_rst>; - reset-names = "nand", "reg"; - interrupts = <0 144 4>; - - nand@0 { - reg = <0>; - } -}; -- cgit v1.2.3 From 06efe6482251520d171bf33b841f35c8677727ad Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 22 Feb 2020 23:34:44 +0900 Subject: dt-bindings: mmc: Convert Cadence SD/SDIO/eMMC controller to json-schema Convert the Cadence SD/SDIO/eMMC host controller IP (a.k.a. SD4HC) binding to DT schema format. Socionext UniPhier ARM 64-bit SoCs are integrated with this IP. Cc: Piotr Sroka Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../devicetree/bindings/mmc/cdns,sdhci.yaml | 143 +++++++++++++++++++++ .../devicetree/bindings/mmc/sdhci-cadence.txt | 80 ------------ 2 files changed, 143 insertions(+), 80 deletions(-) create mode 100644 Documentation/devicetree/bindings/mmc/cdns,sdhci.yaml delete mode 100644 Documentation/devicetree/bindings/mmc/sdhci-cadence.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/mmc/cdns,sdhci.yaml b/Documentation/devicetree/bindings/mmc/cdns,sdhci.yaml new file mode 100644 index 000000000000..2f45dd0d04db --- /dev/null +++ b/Documentation/devicetree/bindings/mmc/cdns,sdhci.yaml @@ -0,0 +1,143 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mmc/cdns,sdhci.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Cadence SD/SDIO/eMMC Host Controller (SD4HC) + +maintainers: + - Masahiro Yamada + - Piotr Sroka + +allOf: + - $ref: mmc-controller.yaml + +properties: + compatible: + items: + - enum: + - socionext,uniphier-sd4hc + - const: cdns,sd4hc + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + # PHY DLL input delays: + # They are used to delay the data valid window, and align the window to + # sampling clock. The delay starts from 5ns (for delay parameter equal to 0) + # and it is increased by 2.5ns in each step. + + cdns,phy-input-delay-sd-highspeed: + description: Value of the delay in the input path for SD high-speed timing + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x1f + + cdns,phy-input-delay-legacy: + description: Value of the delay in the input path for legacy timing + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x1f + + cdns,phy-input-delay-sd-uhs-sdr12: + description: Value of the delay in the input path for SD UHS SDR12 timing + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x1f + + cdns,phy-input-delay-sd-uhs-sdr25: + description: Value of the delay in the input path for SD UHS SDR25 timing + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x1f + + cdns,phy-input-delay-sd-uhs-sdr50: + description: Value of the delay in the input path for SD UHS SDR50 timing + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x1f + + cdns,phy-input-delay-sd-uhs-ddr50: + description: Value of the delay in the input path for SD UHS DDR50 timing + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x1f + + cdns,phy-input-delay-mmc-highspeed: + description: Value of the delay in the input path for MMC high-speed timing + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x1f + + cdns,phy-input-delay-mmc-ddr: + description: Value of the delay in the input path for eMMC high-speed DDR timing + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x1f + + # PHY DLL clock delays: + # Each delay property represents the fraction of the clock period. + # The approximate delay value will be + # (/128)*sdmclk_clock_period. + + cdns,phy-dll-delay-sdclk: + description: | + Value of the delay introduced on the sdclk output for all modes except + HS200, HS400 and HS400_ES. + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x7f + + cdns,phy-dll-delay-sdclk-hsmmc: + description: | + Value of the delay introduced on the sdclk output for HS200, HS400 and + HS400_ES speed modes. + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x7f + + cdns,phy-dll-delay-strobe: + description: | + Value of the delay introduced on the dat_strobe input used in + HS400 / HS400_ES speed modes. + allOf: + - $ref: "/schemas/types.yaml#/definitions/uint32" + - minimum: 0 + - maximum: 0x7f + +required: + - compatible + - reg + - interrupts + - clocks + +examples: + - | + emmc: mmc@5a000000 { + compatible = "socionext,uniphier-sd4hc", "cdns,sd4hc"; + reg = <0x5a000000 0x400>; + interrupts = <0 78 4>; + clocks = <&clk 4>; + bus-width = <8>; + mmc-ddr-1_8v; + mmc-hs200-1_8v; + mmc-hs400-1_8v; + cdns,phy-dll-delay-sdclk = <0>; + }; diff --git a/Documentation/devicetree/bindings/mmc/sdhci-cadence.txt b/Documentation/devicetree/bindings/mmc/sdhci-cadence.txt deleted file mode 100644 index fa423c277853..000000000000 --- a/Documentation/devicetree/bindings/mmc/sdhci-cadence.txt +++ /dev/null @@ -1,80 +0,0 @@ -* Cadence SD/SDIO/eMMC Host Controller - -Required properties: -- compatible: should be one of the following: - "cdns,sd4hc" - default of the IP - "socionext,uniphier-sd4hc" - for Socionext UniPhier SoCs -- reg: offset and length of the register set for the device. -- interrupts: a single interrupt specifier. -- clocks: phandle to the input clock. - -Optional properties: -For eMMC configuration, supported speed modes are not indicated by the SDHCI -Capabilities Register. Instead, the following properties should be specified -if supported. See mmc.txt for details. -- mmc-ddr-1_8v -- mmc-ddr-1_2v -- mmc-hs200-1_8v -- mmc-hs200-1_2v -- mmc-hs400-1_8v -- mmc-hs400-1_2v - -Some PHY delays can be configured by following properties. -PHY DLL input delays: -They are used to delay the data valid window, and align the window -to sampling clock. The delay starts from 5ns (for delay parameter equal to 0) -and it is increased by 2.5ns in each step. -- cdns,phy-input-delay-sd-highspeed: - Value of the delay in the input path for SD high-speed timing - Valid range = [0:0x1F]. -- cdns,phy-input-delay-legacy: - Value of the delay in the input path for legacy timing - Valid range = [0:0x1F]. -- cdns,phy-input-delay-sd-uhs-sdr12: - Value of the delay in the input path for SD UHS SDR12 timing - Valid range = [0:0x1F]. -- cdns,phy-input-delay-sd-uhs-sdr25: - Value of the delay in the input path for SD UHS SDR25 timing - Valid range = [0:0x1F]. -- cdns,phy-input-delay-sd-uhs-sdr50: - Value of the delay in the input path for SD UHS SDR50 timing - Valid range = [0:0x1F]. -- cdns,phy-input-delay-sd-uhs-ddr50: - Value of the delay in the input path for SD UHS DDR50 timing - Valid range = [0:0x1F]. -- cdns,phy-input-delay-mmc-highspeed: - Value of the delay in the input path for MMC high-speed timing - Valid range = [0:0x1F]. -- cdns,phy-input-delay-mmc-ddr: - Value of the delay in the input path for eMMC high-speed DDR timing - Valid range = [0:0x1F]. - -PHY DLL clock delays: -Each delay property represents the fraction of the clock period. -The approximate delay value will be -(/128)*sdmclk_clock_period. -- cdns,phy-dll-delay-sdclk: - Value of the delay introduced on the sdclk output - for all modes except HS200, HS400 and HS400_ES. - Valid range = [0:0x7F]. -- cdns,phy-dll-delay-sdclk-hsmmc: - Value of the delay introduced on the sdclk output - for HS200, HS400 and HS400_ES speed modes. - Valid range = [0:0x7F]. -- cdns,phy-dll-delay-strobe: - Value of the delay introduced on the dat_strobe input - used in HS400 / HS400_ES speed modes. - Valid range = [0:0x7F]. - -Example: - emmc: sdhci@5a000000 { - compatible = "socionext,uniphier-sd4hc", "cdns,sd4hc"; - reg = <0x5a000000 0x400>; - interrupts = <0 78 4>; - clocks = <&clk 4>; - bus-width = <8>; - mmc-ddr-1_8v; - mmc-hs200-1_8v; - mmc-hs400-1_8v; - cdns,phy-dll-delay-sdclk = <0>; - }; -- cgit v1.2.3 From 73c3d1da3519defab71a7aa62963d0af72cd590f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Sat, 22 Feb 2020 20:00:59 +0000 Subject: dt-bindings: rng: Convert BCM2835 to DT schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert BCM2835/6368 Random number generator bindings to DT schema. Signed-off-by: Nícolas F. R. A. Prado Signed-off-by: Rob Herring --- .../devicetree/bindings/rng/brcm,bcm2835.txt | 40 --------------- .../devicetree/bindings/rng/brcm,bcm2835.yaml | 59 ++++++++++++++++++++++ 2 files changed, 59 insertions(+), 40 deletions(-) delete mode 100644 Documentation/devicetree/bindings/rng/brcm,bcm2835.txt create mode 100644 Documentation/devicetree/bindings/rng/brcm,bcm2835.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt deleted file mode 100644 index aaac7975f61c..000000000000 --- a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt +++ /dev/null @@ -1,40 +0,0 @@ -BCM2835/6368 Random number generator - -Required properties: - -- compatible : should be one of - "brcm,bcm2835-rng" - "brcm,bcm-nsp-rng" - "brcm,bcm5301x-rng" or - "brcm,bcm6368-rng" -- reg : Specifies base physical address and size of the registers. - -Optional properties: - -- clocks : phandle to clock-controller plus clock-specifier pair -- clock-names : "ipsec" as a clock name - -Optional properties: - -- interrupts: specify the interrupt for the RNG block - -Example: - -rng { - compatible = "brcm,bcm2835-rng"; - reg = <0x7e104000 0x10>; - interrupts = <2 29>; -}; - -rng@18033000 { - compatible = "brcm,bcm-nsp-rng"; - reg = <0x18033000 0x14>; -}; - -random: rng@10004180 { - compatible = "brcm,bcm6368-rng"; - reg = <0x10004180 0x14>; - - clocks = <&periph_clk 18>; - clock-names = "ipsec"; -}; diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.yaml b/Documentation/devicetree/bindings/rng/brcm,bcm2835.yaml new file mode 100644 index 000000000000..42d9a38e4e1a --- /dev/null +++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.yaml @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/rng/brcm,bcm2835.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: BCM2835/6368 Random number generator + +maintainers: + - Stefan Wahren + - Florian Fainelli + - Herbert Xu + +properties: + compatible: + enum: + - brcm,bcm2835-rng + - brcm,bcm-nsp-rng + - brcm,bcm5301x-rng + - brcm,bcm6368-rng + + reg: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + const: ipsec + + interrupts: + maxItems: 1 + +required: + - compatible + - reg + +examples: + - | + rng { + compatible = "brcm,bcm2835-rng"; + reg = <0x7e104000 0x10>; + interrupts = <2 29>; + }; + + - | + rng@18033000 { + compatible = "brcm,bcm-nsp-rng"; + reg = <0x18033000 0x14>; + }; + + - | + rng@10004180 { + compatible = "brcm,bcm6368-rng"; + reg = <0x10004180 0x14>; + + clocks = <&periph_clk 18>; + clock-names = "ipsec"; + }; -- cgit v1.2.3 From 689e16ed2b1e559eb84e16651261e5bcbf13d7e3 Mon Sep 17 00:00:00 2001 From: Ondrej Jirman Date: Sun, 23 Feb 2020 04:16:12 +0100 Subject: dt-bindings: vendor-prefixes: Add prefix for PocketBook International SA Call it "pocketbook". Signed-off-by: Ondrej Jirman Acked-by: Maxime Ripard Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index a97601e0e537..fcdda587def5 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -767,6 +767,8 @@ patternProperties: description: Broadcom Corporation (formerly PLX Technology) "^pni,.*": description: PNI Sensor Corporation + "^pocketbook,.*": + description: PocketBook International SA "^polaroid,.*": description: Polaroid Corporation "^portwell,.*": -- cgit v1.2.3 From dc8cb9df2b863c71027c6fee8ce370775d739fc3 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 13 Feb 2020 16:38:21 -0500 Subject: doc: Add some more RCU list patterns in the kernel - Add more information about RCU list patterns taking examples from audit subsystem in the linux kernel. - Keep the current audit examples, even though the kernel has changed. - Modify inline text for better passage quality. - Fix typo in code-blocks and improve code comments. - Add text formatting (italics, bold and code) for better emphasis. Patch originally submitted at https://lore.kernel.org/patchwork/patch/1082804/ Co-developed-by: Amol Grover Signed-off-by: Amol Grover Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- Documentation/RCU/listRCU.rst | 275 ++++++++++++++++++++++++++++++++---------- 1 file changed, 211 insertions(+), 64 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst index 7956ff33042b..55d2b30db481 100644 --- a/Documentation/RCU/listRCU.rst +++ b/Documentation/RCU/listRCU.rst @@ -4,12 +4,61 @@ Using RCU to Protect Read-Mostly Linked Lists ============================================= One of the best applications of RCU is to protect read-mostly linked lists -("struct list_head" in list.h). One big advantage of this approach +(``struct list_head`` in list.h). One big advantage of this approach is that all of the required memory barriers are included for you in the list macros. This document describes several applications of RCU, with the best fits first. -Example 1: Read-Side Action Taken Outside of Lock, No In-Place Updates + +Example 1: Read-mostly list: Deferred Destruction +------------------------------------------------- + +A widely used usecase for RCU lists in the kernel is lockless iteration over +all processes in the system. ``task_struct::tasks`` represents the list node that +links all the processes. The list can be traversed in parallel to any list +additions or removals. + +The traversal of the list is done using ``for_each_process()`` which is defined +by the 2 macros:: + + #define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) + + #define for_each_process(p) \ + for (p = &init_task ; (p = next_task(p)) != &init_task ; ) + +The code traversing the list of all processes typically looks like:: + + rcu_read_lock(); + for_each_process(p) { + /* Do something with p */ + } + rcu_read_unlock(); + +The simplified code for removing a process from a task list is:: + + void release_task(struct task_struct *p) + { + write_lock(&tasklist_lock); + list_del_rcu(&p->tasks); + write_unlock(&tasklist_lock); + call_rcu(&p->rcu, delayed_put_task_struct); + } + +When a process exits, ``release_task()`` calls ``list_del_rcu(&p->tasks)`` under +``tasklist_lock`` writer lock protection, to remove the task from the list of +all tasks. The ``tasklist_lock`` prevents concurrent list additions/removals +from corrupting the list. Readers using ``for_each_process()`` are not protected +with the ``tasklist_lock``. To prevent readers from noticing changes in the list +pointers, the ``task_struct`` object is freed only after one or more grace +periods elapse (with the help of call_rcu()). This deferring of destruction +ensures that any readers traversing the list will see valid ``p->tasks.next`` +pointers and deletion/freeing can happen in parallel with traversal of the list. +This pattern is also called an **existence lock**, since RCU pins the object in +memory until all existing readers finish. + + +Example 2: Read-Side Action Taken Outside of Lock: No In-Place Updates ---------------------------------------------------------------------- The best applications are cases where, if reader-writer locking were @@ -26,7 +75,7 @@ added or deleted, rather than being modified in place. A straightforward example of this use of RCU may be found in the system-call auditing support. For example, a reader-writer locked -implementation of audit_filter_task() might be as follows:: +implementation of ``audit_filter_task()`` might be as follows:: static enum audit_state audit_filter_task(struct task_struct *tsk) { @@ -34,7 +83,7 @@ implementation of audit_filter_task() might be as follows:: enum audit_state state; read_lock(&auditsc_lock); - /* Note: audit_netlink_sem held by caller. */ + /* Note: audit_filter_mutex held by caller. */ list_for_each_entry(e, &audit_tsklist, list) { if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { read_unlock(&auditsc_lock); @@ -58,7 +107,7 @@ This means that RCU can be easily applied to the read side, as follows:: enum audit_state state; rcu_read_lock(); - /* Note: audit_netlink_sem held by caller. */ + /* Note: audit_filter_mutex held by caller. */ list_for_each_entry_rcu(e, &audit_tsklist, list) { if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { rcu_read_unlock(); @@ -69,18 +118,18 @@ This means that RCU can be easily applied to the read side, as follows:: return AUDIT_BUILD_CONTEXT; } -The read_lock() and read_unlock() calls have become rcu_read_lock() +The ``read_lock()`` and ``read_unlock()`` calls have become rcu_read_lock() and rcu_read_unlock(), respectively, and the list_for_each_entry() has -become list_for_each_entry_rcu(). The _rcu() list-traversal primitives +become list_for_each_entry_rcu(). The **_rcu()** list-traversal primitives insert the read-side memory barriers that are required on DEC Alpha CPUs. -The changes to the update side are also straightforward. A reader-writer -lock might be used as follows for deletion and insertion:: +The changes to the update side are also straightforward. A reader-writer lock +might be used as follows for deletion and insertion:: static inline int audit_del_rule(struct audit_rule *rule, struct list_head *list) { - struct audit_entry *e; + struct audit_entry *e; write_lock(&auditsc_lock); list_for_each_entry(e, list, list) { @@ -113,9 +162,9 @@ Following are the RCU equivalents for these two functions:: static inline int audit_del_rule(struct audit_rule *rule, struct list_head *list) { - struct audit_entry *e; + struct audit_entry *e; - /* Do not use the _rcu iterator here, since this is the only + /* No need to use the _rcu iterator here, since this is the only * deletion routine. */ list_for_each_entry(e, list, list) { if (!audit_compare_rule(rule, &e->rule)) { @@ -139,41 +188,41 @@ Following are the RCU equivalents for these two functions:: return 0; } -Normally, the write_lock() and write_unlock() would be replaced by -a spin_lock() and a spin_unlock(), but in this case, all callers hold -audit_netlink_sem, so no additional locking is required. The auditsc_lock -can therefore be eliminated, since use of RCU eliminates the need for -writers to exclude readers. Normally, the write_lock() calls would -be converted into spin_lock() calls. +Normally, the ``write_lock()`` and ``write_unlock()`` would be replaced by a +spin_lock() and a spin_unlock(). But in this case, all callers hold +``audit_filter_mutex``, so no additional locking is required. The +``auditsc_lock`` can therefore be eliminated, since use of RCU eliminates the +need for writers to exclude readers. The list_del(), list_add(), and list_add_tail() primitives have been replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu(). -The _rcu() list-manipulation primitives add memory barriers that are -needed on weakly ordered CPUs (most of them!). The list_del_rcu() -primitive omits the pointer poisoning debug-assist code that would -otherwise cause concurrent readers to fail spectacularly. +The **_rcu()** list-manipulation primitives add memory barriers that are needed on +weakly ordered CPUs (most of them!). The list_del_rcu() primitive omits the +pointer poisoning debug-assist code that would otherwise cause concurrent +readers to fail spectacularly. -So, when readers can tolerate stale data and when entries are either added -or deleted, without in-place modification, it is very easy to use RCU! +So, when readers can tolerate stale data and when entries are either added or +deleted, without in-place modification, it is very easy to use RCU! -Example 2: Handling In-Place Updates + +Example 3: Handling In-Place Updates ------------------------------------ -The system-call auditing code does not update auditing rules in place. -However, if it did, reader-writer-locked code to do so might look as -follows (presumably, the field_count is only permitted to decrease, -otherwise, the added fields would need to be filled in):: +The system-call auditing code does not update auditing rules in place. However, +if it did, the reader-writer-locked code to do so might look as follows +(assuming only ``field_count`` is updated, otherwise, the added fields would +need to be filled in):: static inline int audit_upd_rule(struct audit_rule *rule, struct list_head *list, __u32 newaction, __u32 newfield_count) { - struct audit_entry *e; - struct audit_newentry *ne; + struct audit_entry *e; + struct audit_entry *ne; write_lock(&auditsc_lock); - /* Note: audit_netlink_sem held by caller. */ + /* Note: audit_filter_mutex held by caller. */ list_for_each_entry(e, list, list) { if (!audit_compare_rule(rule, &e->rule)) { e->rule.action = newaction; @@ -188,16 +237,16 @@ otherwise, the added fields would need to be filled in):: The RCU version creates a copy, updates the copy, then replaces the old entry with the newly updated entry. This sequence of actions, allowing -concurrent reads while doing a copy to perform an update, is what gives -RCU ("read-copy update") its name. The RCU code is as follows:: +concurrent reads while making a copy to perform an update, is what gives +RCU (*read-copy update*) its name. The RCU code is as follows:: static inline int audit_upd_rule(struct audit_rule *rule, struct list_head *list, __u32 newaction, __u32 newfield_count) { - struct audit_entry *e; - struct audit_newentry *ne; + struct audit_entry *e; + struct audit_entry *ne; list_for_each_entry(e, list, list) { if (!audit_compare_rule(rule, &e->rule)) { @@ -215,34 +264,45 @@ RCU ("read-copy update") its name. The RCU code is as follows:: return -EFAULT; /* No matching rule */ } -Again, this assumes that the caller holds audit_netlink_sem. Normally, -the reader-writer lock would become a spinlock in this sort of code. +Again, this assumes that the caller holds ``audit_filter_mutex``. Normally, the +writer lock would become a spinlock in this sort of code. -Example 3: Eliminating Stale Data +Another use of this pattern can be found in the openswitch driver's *connection +tracking table* code in ``ct_limit_set()``. The table holds connection tracking +entries and has a limit on the maximum entries. There is one such table +per-zone and hence one *limit* per zone. The zones are mapped to their limits +through a hashtable using an RCU-managed hlist for the hash chains. When a new +limit is set, a new limit object is allocated and ``ct_limit_set()`` is called +to replace the old limit object with the new one using list_replace_rcu(). +The old limit object is then freed after a grace period using kfree_rcu(). + + +Example 4: Eliminating Stale Data --------------------------------- -The auditing examples above tolerate stale data, as do most algorithms +The auditing example above tolerates stale data, as do most algorithms that are tracking external state. Because there is a delay from the time the external state changes before Linux becomes aware of the change, -additional RCU-induced staleness is normally not a problem. +additional RCU-induced staleness is generally not a problem. However, there are many examples where stale data cannot be tolerated. One example in the Linux kernel is the System V IPC (see the ipc_lock() -function in ipc/util.c). This code checks a "deleted" flag under a -per-entry spinlock, and, if the "deleted" flag is set, pretends that the +function in ipc/util.c). This code checks a *deleted* flag under a +per-entry spinlock, and, if the *deleted* flag is set, pretends that the entry does not exist. For this to be helpful, the search function must -return holding the per-entry spinlock, as ipc_lock() does in fact do. +return holding the per-entry lock, as ipc_lock() does in fact do. + +.. _quick_quiz: Quick Quiz: - Why does the search function need to return holding the per-entry lock for - this deleted-flag technique to be helpful? + For the deleted-flag technique to be helpful, why is it necessary + to hold the per-entry lock while returning from the search function? -:ref:`Answer to Quick Quiz ` +:ref:`Answer to Quick Quiz ` -If the system-call audit module were to ever need to reject stale data, -one way to accomplish this would be to add a "deleted" flag and a "lock" -spinlock to the audit_entry structure, and modify audit_filter_task() -as follows:: +If the system-call audit module were to ever need to reject stale data, one way +to accomplish this would be to add a ``deleted`` flag and a ``lock`` spinlock to the +audit_entry structure, and modify ``audit_filter_task()`` as follows:: static enum audit_state audit_filter_task(struct task_struct *tsk) { @@ -267,20 +327,20 @@ as follows:: } Note that this example assumes that entries are only added and deleted. -Additional mechanism is required to deal correctly with the -update-in-place performed by audit_upd_rule(). For one thing, -audit_upd_rule() would need additional memory barriers to ensure -that the list_add_rcu() was really executed before the list_del_rcu(). +Additional mechanism is required to deal correctly with the update-in-place +performed by ``audit_upd_rule()``. For one thing, ``audit_upd_rule()`` would +need additional memory barriers to ensure that the list_add_rcu() was really +executed before the list_del_rcu(). -The audit_del_rule() function would need to set the "deleted" -flag under the spinlock as follows:: +The ``audit_del_rule()`` function would need to set the ``deleted`` flag under the +spinlock as follows:: static inline int audit_del_rule(struct audit_rule *rule, struct list_head *list) { - struct audit_entry *e; + struct audit_entry *e; - /* Do not need to use the _rcu iterator here, since this + /* No need to use the _rcu iterator here, since this * is the only deletion routine. */ list_for_each_entry(e, list, list) { if (!audit_compare_rule(rule, &e->rule)) { @@ -295,6 +355,91 @@ flag under the spinlock as follows:: return -EFAULT; /* No matching rule */ } +This too assumes that the caller holds ``audit_filter_mutex``. + + +Example 5: Skipping Stale Objects +--------------------------------- + +For some usecases, reader performance can be improved by skipping stale objects +during read-side list traversal if the object in concern is pending destruction +after one or more grace periods. One such example can be found in the timerfd +subsystem. When a ``CLOCK_REALTIME`` clock is reprogrammed - for example due to +setting of the system time, then all programmed timerfds that depend on this +clock get triggered and processes waiting on them to expire are woken up in +advance of their scheduled expiry. To facilitate this, all such timers are added +to an RCU-managed ``cancel_list`` when they are setup in +``timerfd_setup_cancel()``:: + + static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) + { + spin_lock(&ctx->cancel_lock); + if ((ctx->clockid == CLOCK_REALTIME && + (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { + if (!ctx->might_cancel) { + ctx->might_cancel = true; + spin_lock(&cancel_lock); + list_add_rcu(&ctx->clist, &cancel_list); + spin_unlock(&cancel_lock); + } + } + spin_unlock(&ctx->cancel_lock); + } + +When a timerfd is freed (fd is closed), then the ``might_cancel`` flag of the +timerfd object is cleared, the object removed from the ``cancel_list`` and +destroyed:: + + int timerfd_release(struct inode *inode, struct file *file) + { + struct timerfd_ctx *ctx = file->private_data; + + spin_lock(&ctx->cancel_lock); + if (ctx->might_cancel) { + ctx->might_cancel = false; + spin_lock(&cancel_lock); + list_del_rcu(&ctx->clist); + spin_unlock(&cancel_lock); + } + spin_unlock(&ctx->cancel_lock); + + hrtimer_cancel(&ctx->t.tmr); + kfree_rcu(ctx, rcu); + return 0; + } + +If the ``CLOCK_REALTIME`` clock is set, for example by a time server, the +hrtimer framework calls ``timerfd_clock_was_set()`` which walks the +``cancel_list`` and wakes up processes waiting on the timerfd. While iterating +the ``cancel_list``, the ``might_cancel`` flag is consulted to skip stale +objects:: + + void timerfd_clock_was_set(void) + { + struct timerfd_ctx *ctx; + unsigned long flags; + + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &cancel_list, clist) { + if (!ctx->might_cancel) + continue; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->moffs != ktime_mono_to_real(0)) { + ctx->moffs = KTIME_MAX; + ctx->ticks++; + wake_up_locked_poll(&ctx->wqh, EPOLLIN); + } + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + } + rcu_read_unlock(); + } + +The key point here is, because RCU-traversal of the ``cancel_list`` happens +while objects are being added and removed to the list, sometimes the traversal +can step on an object that has been removed from the list. In this example, it +is seen that it is better to skip such objects using a flag. + + Summary ------- @@ -303,19 +448,21 @@ the most amenable to use of RCU. The simplest case is where entries are either added or deleted from the data structure (or atomically modified in place), but non-atomic in-place modifications can be handled by making a copy, updating the copy, then replacing the original with the copy. -If stale data cannot be tolerated, then a "deleted" flag may be used +If stale data cannot be tolerated, then a *deleted* flag may be used in conjunction with a per-entry spinlock in order to allow the search function to reject newly deleted data. -.. _answer_quick_quiz_list: +.. _quick_quiz_answer: Answer to Quick Quiz: - Why does the search function need to return holding the per-entry - lock for this deleted-flag technique to be helpful? + For the deleted-flag technique to be helpful, why is it necessary + to hold the per-entry lock while returning from the search function? If the search function drops the per-entry lock before returning, then the caller will be processing stale data in any case. If it is really OK to be processing stale data, then you don't need a - "deleted" flag. If processing stale data really is a problem, + *deleted* flag. If processing stale data really is a problem, then you need to hold the per-entry lock across all of the code that uses the value that was returned. + +:ref:`Back to Quick Quiz ` -- cgit v1.2.3 From d18c265fbf19de7716e6e3054b752594133b0739 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Jan 2020 21:07:56 +0100 Subject: doc/RCU/Design: Remove remaining HTML tags in ReST files Commit ccc9971e2147 ("docs: rcu: convert some articles from html to ReST") has converted a few of html RCU docs into ReST files, but a few of html tags which not supported on rst is remaining. This commit converts those to ReST appropriate alternatives. Reviewed-by: Madhuparna Bhowmik Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- .../RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst index 1a8b129cfc04..83ae3b79a643 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst @@ -4,7 +4,7 @@ A Tour Through TREE_RCU's Grace-Period Memory Ordering August 8, 2017 -This article was contributed by Paul E. McKenney +This article was contributed by Paul E. McKenney Introduction ============ @@ -48,7 +48,7 @@ Tree RCU Grace Period Memory Ordering Building Blocks The workhorse for RCU's grace-period memory ordering is the critical section for the ``rcu_node`` structure's -``->lock``. These critical sections use helper functions for lock +``->lock``. These critical sections use helper functions for lock acquisition, including ``raw_spin_lock_rcu_node()``, ``raw_spin_lock_irq_rcu_node()``, and ``raw_spin_lock_irqsave_rcu_node()``. Their lock-release counterparts are ``raw_spin_unlock_rcu_node()``, @@ -102,9 +102,9 @@ lock-acquisition and lock-release functions:: 23 r3 = READ_ONCE(x); 24 } 25 - 26 WARN_ON(r1 == 0 && r2 == 0 && r3 == 0); + 26 WARN_ON(r1 == 0 && r2 == 0 && r3 == 0); -The ``WARN_ON()`` is evaluated at “the end of time”, +The ``WARN_ON()`` is evaluated at "the end of time", after all changes have propagated throughout the system. Without the ``smp_mb__after_unlock_lock()`` provided by the acquisition functions, this ``WARN_ON()`` could trigger, for example -- cgit v1.2.3 From c50a871409dc664f2f89d11926d9a6b8ab7f5b07 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Jan 2020 21:07:57 +0100 Subject: doc/RCU/listRCU: Fix typos in a example code snippets Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- Documentation/RCU/listRCU.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst index 55d2b30db481..e768f56e8fa3 100644 --- a/Documentation/RCU/listRCU.rst +++ b/Documentation/RCU/listRCU.rst @@ -226,7 +226,7 @@ need to be filled in):: list_for_each_entry(e, list, list) { if (!audit_compare_rule(rule, &e->rule)) { e->rule.action = newaction; - e->rule.file_count = newfield_count; + e->rule.field_count = newfield_count; write_unlock(&auditsc_lock); return 0; } @@ -255,7 +255,7 @@ RCU (*read-copy update*) its name. The RCU code is as follows:: return -ENOMEM; audit_copy_rule(&ne->rule, &e->rule); ne->rule.action = newaction; - ne->rule.file_count = newfield_count; + ne->rule.field_count = newfield_count; list_replace_rcu(&e->list, &ne->list); call_rcu(&e->rcu, audit_free_rule); return 0; -- cgit v1.2.3 From 3282b0469248ab25b3f40b95e9a3d357c9d946d5 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Jan 2020 21:07:58 +0100 Subject: doc/RCU/listRCU: Update example function name listRCU.rst document gives an example with 'ipc_lock()', but the function has dropped off by commit 82061c57ce93 ("ipc: drop ipc_lock()"). Because the main logic of 'ipc_lock()' has melded in 'shm_lock()' by the commit, this commit updates the document to use 'shm_lock()' instead. Reviewed-by: Madhuparna Bhowmik Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- Documentation/RCU/listRCU.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/listRCU.rst b/Documentation/RCU/listRCU.rst index e768f56e8fa3..2a643e293fb4 100644 --- a/Documentation/RCU/listRCU.rst +++ b/Documentation/RCU/listRCU.rst @@ -286,11 +286,11 @@ time the external state changes before Linux becomes aware of the change, additional RCU-induced staleness is generally not a problem. However, there are many examples where stale data cannot be tolerated. -One example in the Linux kernel is the System V IPC (see the ipc_lock() -function in ipc/util.c). This code checks a *deleted* flag under a +One example in the Linux kernel is the System V IPC (see the shm_lock() +function in ipc/shm.c). This code checks a *deleted* flag under a per-entry spinlock, and, if the *deleted* flag is set, pretends that the entry does not exist. For this to be helpful, the search function must -return holding the per-entry lock, as ipc_lock() does in fact do. +return holding the per-entry spinlock, as shm_lock() does in fact do. .. _quick_quiz: -- cgit v1.2.3 From be2895681d6d8b1eb5a052d4655dea0d7cc2d84d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Jan 2020 21:07:59 +0100 Subject: doc/RCU/rcu: Use ':ref:' for links to other docs Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- Documentation/RCU/rcu.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/rcu.rst b/Documentation/RCU/rcu.rst index 8dfb437dacc3..a1dd71d01862 100644 --- a/Documentation/RCU/rcu.rst +++ b/Documentation/RCU/rcu.rst @@ -11,8 +11,8 @@ must be long enough that any readers accessing the item being deleted have since dropped their references. For example, an RCU-protected deletion from a linked list would first remove the item from the list, wait for a grace period to elapse, then free the element. See the -Documentation/RCU/listRCU.rst file for more information on using RCU with -linked lists. +:ref:`Documentation/RCU/listRCU.rst ` for more information on +using RCU with linked lists. Frequently Asked Questions -------------------------- @@ -50,7 +50,7 @@ Frequently Asked Questions - If I am running on a uniprocessor kernel, which can only do one thing at a time, why should I wait for a grace period? - See the Documentation/RCU/UP.rst file for more information. + See :ref:`Documentation/RCU/UP.rst ` for more information. - How can I see where RCU is currently used in the Linux kernel? @@ -68,9 +68,9 @@ Frequently Asked Questions - Why the name "RCU"? - "RCU" stands for "read-copy update". The file Documentation/RCU/listRCU.rst - has more information on where this name came from, search for - "read-copy update" to find it. + "RCU" stands for "read-copy update". + :ref:`Documentation/RCU/listRCU.rst ` has more information on where + this name came from, search for "read-copy update" to find it. - I hear that RCU is patented? What is with that? -- cgit v1.2.3 From 6a534b299ab20bdb345d3a6df0fc557c963fc50d Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Jan 2020 21:08:00 +0100 Subject: doc/RCU/rcu: Use absolute paths for non-rst files Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- Documentation/RCU/rcu.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/rcu.rst b/Documentation/RCU/rcu.rst index a1dd71d01862..2a830c51477e 100644 --- a/Documentation/RCU/rcu.rst +++ b/Documentation/RCU/rcu.rst @@ -75,7 +75,7 @@ Frequently Asked Questions - I hear that RCU is patented? What is with that? Yes, it is. There are several known patents related to RCU, - search for the string "Patent" in RTFP.txt to find them. + search for the string "Patent" in Documentation/RCU/RTFP.txt to find them. Of these, one was allowed to lapse by the assignee, and the others have been contributed to the Linux kernel under GPL. There are now also LGPL implementations of user-level RCU @@ -88,5 +88,5 @@ Frequently Asked Questions - Where can I find more information on RCU? - See the RTFP.txt file in this directory. + See the Documentation/RCU/RTFP.txt file. Or point your browser at (http://www.rdrop.com/users/paulmck/RCU/). -- cgit v1.2.3 From 06a649b314b3e579cfe1c50112c30e3127a36dba Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 6 Jan 2020 21:08:01 +0100 Subject: doc/RCU/rcu: Use https instead of http if possible Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- Documentation/RCU/rcu.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/RCU/rcu.rst b/Documentation/RCU/rcu.rst index 2a830c51477e..0e03c6ef3147 100644 --- a/Documentation/RCU/rcu.rst +++ b/Documentation/RCU/rcu.rst @@ -79,7 +79,7 @@ Frequently Asked Questions Of these, one was allowed to lapse by the assignee, and the others have been contributed to the Linux kernel under GPL. There are now also LGPL implementations of user-level RCU - available (http://liburcu.org/). + available (https://liburcu.org/). - I hear that RCU needs work in order to support realtime kernels? -- cgit v1.2.3 From 9671f30ee25151f680d2f4345b4e4c67bed6559c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jan 2020 11:50:05 -0800 Subject: doc: Add rcutorture scripting to torture.txt For testing mainline, the kvm.sh rcutorture script is the preferred approach to testing. This commit therefore adds it to the torture.txt documentation. Signed-off-by: Paul E. McKenney --- Documentation/RCU/torture.txt | 147 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 140 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index a41a0384d20c..af712a3c5b6a 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt @@ -124,9 +124,14 @@ using a dynamically allocated srcu_struct (hence "srcud-" rather than debugging. The final "T" entry contains the totals of the counters. -USAGE +USAGE ON SPECIFIC KERNEL BUILDS -The following script may be used to torture RCU: +It is sometimes desirable to torture RCU on a specific kernel build, +for example, when preparing to put that kernel build into production. +In that case, the kernel should be built with CONFIG_RCU_TORTURE_TEST=m +so that the test can be started using modprobe and terminated using rmmod. + +For example, the following script may be used to torture RCU: #!/bin/sh @@ -142,8 +147,136 @@ checked for such errors. The "rmmod" command forces a "SUCCESS", two are self-explanatory, while the last indicates that while there were no RCU failures, CPU-hotplug problems were detected. -However, the tools/testing/selftests/rcutorture/bin/kvm.sh script -provides better automation, including automatic failure analysis. -It assumes a qemu/kvm-enabled platform, and runs guest OSes out of initrd. -See tools/testing/selftests/rcutorture/doc/initrd.txt for instructions -on setting up such an initrd. + +USAGE ON MAINLINE KERNELS + +When using rcutorture to test changes to RCU itself, it is often +necessary to build a number of kernels in order to test that change +across a broad range of combinations of the relevant Kconfig options +and of the relevant kernel boot parameters. In this situation, use +of modprobe and rmmod can be quite time-consuming and error-prone. + +Therefore, the tools/testing/selftests/rcutorture/bin/kvm.sh +script is available for mainline testing for x86, arm64, and +powerpc. By default, it will run the series of tests specified by +tools/testing/selftests/rcutorture/configs/rcu/CFLIST, with each test +running for 30 minutes within a guest OS using a minimal userspace +supplied by an automatically generated initrd. After the tests are +complete, the resulting build products and console output are analyzed +for errors and the results of the runs are summarized. + +On larger systems, rcutorture testing can be accelerated by passing the +--cpus argument to kvm.sh. For example, on a 64-CPU system, "--cpus 43" +would use up to 43 CPUs to run tests concurrently, which as of v5.4 would +complete all the scenarios in two batches, reducing the time to complete +from about eight hours to about one hour (not counting the time to build +the sixteen kernels). The "--dryrun sched" argument will not run tests, +but rather tell you how the tests would be scheduled into batches. This +can be useful when working out how many CPUs to specify in the --cpus +argument. + +Not all changes require that all scenarios be run. For example, a change +to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the +--configs argument to kvm.sh as follows: "--configs 'SRCU-N SRCU-P'". +Large systems can run multiple copies of of the full set of scenarios, +for example, a system with 448 hardware threads can run five instances +of the full set concurrently. To make this happen: + + kvm.sh --cpus 448 --configs '5*CFLIST' + +Alternatively, such a system can run 56 concurrent instances of a single +eight-CPU scenario: + + kvm.sh --cpus 448 --configs '56*TREE04' + +Or 28 concurrent instances of each of two eight-CPU scenarios: + + kvm.sh --cpus 448 --configs '28*TREE03 28*TREE04' + +Of course, each concurrent instance will use memory, which can be +limited using the --memory argument, which defaults to 512M. Small +values for memory may require disabling the callback-flooding tests +using the --bootargs parameter discussed below. + +Sometimes additional debugging is useful, and in such cases the --kconfig +parameter to kvm.sh may be used, for example, "--kconfig 'CONFIG_KASAN=y'". + +Kernel boot arguments can also be supplied, for example, to control +rcutorture's module parameters. For example, to test a change to RCU's +CPU stall-warning code, use "--bootargs 'rcutorture.stall_cpu=30'". +This will of course result in the scripting reporting a failure, namely +the resuling RCU CPU stall warning. As noted above, reducing memory may +require disabling rcutorture's callback-flooding tests: + + kvm.sh --cpus 448 --configs '56*TREE04' --memory 128M \ + --bootargs 'rcutorture.fwd_progress=0' + +Sometimes all that is needed is a full set of kernel builds. This is +what the --buildonly argument does. + +Finally, the --trust-make argument allows each kernel build to reuse what +it can from the previous kernel build. + +There are additional more arcane arguments that are documented in the +source code of the kvm.sh script. + +If a run contains failures, the number of buildtime and runtime failures +is listed at the end of the kvm.sh output, which you really should redirect +to a file. The build products and console output of each run is kept in +tools/testing/selftests/rcutorture/res in timestamped directories. A +given directory can be supplied to kvm-find-errors.sh in order to have +it cycle you through summaries of errors and full error logs. For example: + + tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh \ + tools/testing/selftests/rcutorture/res/2020.01.20-15.54.23 + +However, it is often more convenient to access the files directly. +Files pertaining to all scenarios in a run reside in the top-level +directory (2020.01.20-15.54.23 in the example above), while per-scenario +files reside in a subdirectory named after the scenario (for example, +"TREE04"). If a given scenario ran more than once (as in "--configs +'56*TREE04'" above), the directories corresponding to the second and +subsequent runs of that scenario include a sequence number, for example, +"TREE04.2", "TREE04.3", and so on. + +The most frequently used file in the top-level directory is testid.txt. +If the test ran in a git repository, then this file contains the commit +that was tested and any uncommitted changes in diff format. + +The most frequently used files in each per-scenario-run directory are: + +.config: This file contains the Kconfig options. + +Make.out: This contains build output for a specific scenario. + +console.log: This contains the console output for a specific scenario. + This file may be examined once the kernel has booted, but + it might not exist if the build failed. + +vmlinux: This contains the kernel, which can be useful with tools like + objdump and gdb. + +A number of additional files are available, but are less frequently used. +Many are intended for debugging of rcutorture itself or of its scripting. + +As of v5.4, a successful run with the default set of scenarios produces +the following summary at the end of the run on a 12-CPU system: + +SRCU-N ------- 804233 GPs (148.932/s) [srcu: g10008272 f0x0 ] +SRCU-P ------- 202320 GPs (37.4667/s) [srcud: g1809476 f0x0 ] +SRCU-t ------- 1122086 GPs (207.794/s) [srcu: g0 f0x0 ] +SRCU-u ------- 1111285 GPs (205.794/s) [srcud: g1 f0x0 ] +TASKS01 ------- 19666 GPs (3.64185/s) [tasks: g0 f0x0 ] +TASKS02 ------- 20541 GPs (3.80389/s) [tasks: g0 f0x0 ] +TASKS03 ------- 19416 GPs (3.59556/s) [tasks: g0 f0x0 ] +TINY01 ------- 836134 GPs (154.84/s) [rcu: g0 f0x0 ] n_max_cbs: 34198 +TINY02 ------- 850371 GPs (157.476/s) [rcu: g0 f0x0 ] n_max_cbs: 2631 +TREE01 ------- 162625 GPs (30.1157/s) [rcu: g1124169 f0x0 ] +TREE02 ------- 333003 GPs (61.6672/s) [rcu: g2647753 f0x0 ] n_max_cbs: 35844 +TREE03 ------- 306623 GPs (56.782/s) [rcu: g2975325 f0x0 ] n_max_cbs: 1496497 +CPU count limited from 16 to 12 +TREE04 ------- 246149 GPs (45.5831/s) [rcu: g1695737 f0x0 ] n_max_cbs: 434961 +TREE05 ------- 314603 GPs (58.2598/s) [rcu: g2257741 f0x2 ] n_max_cbs: 193997 +TREE07 ------- 167347 GPs (30.9902/s) [rcu: g1079021 f0x0 ] n_max_cbs: 478732 +CPU count limited from 16 to 12 +TREE09 ------- 752238 GPs (139.303/s) [rcu: g13075057 f0x0 ] n_max_cbs: 99011 -- cgit v1.2.3 From 8149b5cbfa1540cd7542fd4e790a2874afbc5001 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 31 Jan 2020 21:52:37 +0100 Subject: Documentation/memory-barriers: Fix typos Signed-off-by: SeongJae Park Signed-off-by: Paul E. McKenney --- Documentation/memory-barriers.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 7146da061693..e1c355e84edd 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -185,7 +185,7 @@ As a further example, consider this sequence of events: =============== =============== { A == 1, B == 2, C == 3, P == &A, Q == &C } B = 4; Q = P; - P = &B D = *Q; + P = &B; D = *Q; There is an obvious data dependency here, as the value loaded into D depends on the address retrieved from P by CPU 2. At the end of the sequence, any of the @@ -569,7 +569,7 @@ following sequence of events: { A == 1, B == 2, C == 3, P == &A, Q == &C } B = 4; - WRITE_ONCE(P, &B) + WRITE_ONCE(P, &B); Q = READ_ONCE(P); D = *Q; @@ -1721,7 +1721,7 @@ of optimizations: and WRITE_ONCE() are more selective: With READ_ONCE() and WRITE_ONCE(), the compiler need only forget the contents of the indicated memory locations, while with barrier() the compiler must - discard the value of all memory locations that it has currented + discard the value of all memory locations that it has currently cached in any machine registers. Of course, the compiler must also respect the order in which the READ_ONCE()s and WRITE_ONCE()s occur, though the CPU of course need not do so. @@ -1833,7 +1833,7 @@ Aside: In the case of data dependencies, the compiler would be expected to issue the loads in the correct order (eg. `a[b]` would have to load the value of b before loading a[b]), however there is no guarantee in the C specification that the compiler may not speculate the value of b -(eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1) +(eg. is equal to 1) and load a[b] before b (eg. tmp = a[1]; if (b != 1) tmp = a[b]; ). There is also the problem of a compiler reloading b after having loaded a[b], thus having a newer copy of b than a[b]. A consensus has not yet been reached about these problems, however the READ_ONCE() -- cgit v1.2.3 From 146b5bbf01bee29eefca152c1638c2d1cce90a28 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 17 Dec 2019 14:20:46 +0100 Subject: media: dt-bindings: media: venus: Convert msm8916 to DT schema Convert qcom,msm8916-venus Venus binding to DT schema Reviewed-by: Rob Herring Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- .../bindings/media/qcom,msm8916-venus.yaml | 119 +++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 Documentation/devicetree/bindings/media/qcom,msm8916-venus.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/qcom,msm8916-venus.yaml b/Documentation/devicetree/bindings/media/qcom,msm8916-venus.yaml new file mode 100644 index 000000000000..f9606df02d70 --- /dev/null +++ b/Documentation/devicetree/bindings/media/qcom,msm8916-venus.yaml @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/media/qcom,msm8916-venus.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Qualcomm Venus video encode and decode accelerators + +maintainers: + - Stanimir Varbanov + +description: | + The Venus IP is a video encode and decode accelerator present + on Qualcomm platforms + +properties: + compatible: + const: qcom,msm8916-venus + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + power-domains: + maxItems: 1 + + clocks: + maxItems: 3 + + clock-names: + items: + - const: core + - const: iface + - const: bus + + iommus: + maxItems: 1 + + memory-region: + maxItems: 1 + + video-decoder: + type: object + + properties: + compatible: + const: "venus-decoder" + + required: + - compatible + + additionalProperties: false + + video-encoder: + type: object + + properties: + compatible: + const: "venus-encoder" + + required: + - compatible + + additionalProperties: false + + video-firmware: + type: object + + description: | + Firmware subnode is needed when the platform does not + have TrustZone. + + properties: + iommus: + maxItems: 1 + + required: + - iommus + +required: + - compatible + - reg + - interrupts + - power-domains + - clocks + - clock-names + - iommus + - memory-region + - video-decoder + - video-encoder + +examples: + - | + #include + #include + + video-codec@1d00000 { + compatible = "qcom,msm8916-venus"; + reg = <0x01d00000 0xff000>; + interrupts = ; + clocks = <&gcc GCC_VENUS0_VCODEC0_CLK>, + <&gcc GCC_VENUS0_AHB_CLK>, + <&gcc GCC_VENUS0_AXI_CLK>; + clock-names = "core", "iface", "bus"; + power-domains = <&gcc VENUS_GDSC>; + iommus = <&apps_iommu 5>; + memory-region = <&venus_mem>; + + video-decoder { + compatible = "venus-decoder"; + }; + + video-encoder { + compatible = "venus-encoder"; + }; + }; -- cgit v1.2.3 From df7acc7e7ed440be1a7713baf46a15c71b5a6a77 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 17 Dec 2019 14:24:55 +0100 Subject: media: dt-bindings: media: venus: Convert msm8996 to DT schema Convert qcom,msm8996-venus Venus binding to DT schema. Reviewed-by: Rob Herring Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- .../bindings/media/qcom,msm8996-venus.yaml | 172 +++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 Documentation/devicetree/bindings/media/qcom,msm8996-venus.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/qcom,msm8996-venus.yaml b/Documentation/devicetree/bindings/media/qcom,msm8996-venus.yaml new file mode 100644 index 000000000000..fa0dc6c47f1d --- /dev/null +++ b/Documentation/devicetree/bindings/media/qcom,msm8996-venus.yaml @@ -0,0 +1,172 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/media/qcom,msm8996-venus.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Qualcomm Venus video encode and decode accelerators + +maintainers: + - Stanimir Varbanov + +description: | + The Venus IP is a video encode and decode accelerator present + on Qualcomm platforms + +properties: + compatible: + const: qcom,msm8996-venus + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + power-domains: + maxItems: 1 + + clocks: + maxItems: 4 + + clock-names: + items: + - const: core + - const: iface + - const: bus + - const: mbus + + iommus: + maxItems: 20 + + memory-region: + maxItems: 1 + + video-decoder: + type: object + + properties: + compatible: + const: venus-decoder + + clocks: + maxItems: 1 + + clock-names: + items: + - const: core + + power-domains: + maxItems: 1 + + required: + - compatible + - clocks + - clock-names + - power-domains + + additionalProperties: false + + video-encoder: + type: object + + properties: + compatible: + const: venus-encoder + + clocks: + maxItems: 1 + + clock-names: + items: + - const: core + + power-domains: + maxItems: 1 + + required: + - compatible + - clocks + - clock-names + - power-domains + + additionalProperties: false + + video-firmware: + type: object + + description: | + Firmware subnode is needed when the platform does not + have TrustZone. + + properties: + iommus: + maxItems: 1 + + required: + - iommus + +required: + - compatible + - reg + - interrupts + - power-domains + - clocks + - clock-names + - iommus + - memory-region + - video-decoder + - video-encoder + +examples: + - | + #include + #include + + video-codec@c00000 { + compatible = "qcom,msm8996-venus"; + reg = <0x00c00000 0xff000>; + interrupts = ; + clocks = <&mmcc VIDEO_CORE_CLK>, + <&mmcc VIDEO_AHB_CLK>, + <&mmcc VIDEO_AXI_CLK>, + <&mmcc VIDEO_MAXI_CLK>; + clock-names = "core", "iface", "bus", "mbus"; + power-domains = <&mmcc VENUS_GDSC>; + iommus = <&venus_smmu 0x00>, + <&venus_smmu 0x01>, + <&venus_smmu 0x0a>, + <&venus_smmu 0x07>, + <&venus_smmu 0x0e>, + <&venus_smmu 0x0f>, + <&venus_smmu 0x08>, + <&venus_smmu 0x09>, + <&venus_smmu 0x0b>, + <&venus_smmu 0x0c>, + <&venus_smmu 0x0d>, + <&venus_smmu 0x10>, + <&venus_smmu 0x11>, + <&venus_smmu 0x21>, + <&venus_smmu 0x28>, + <&venus_smmu 0x29>, + <&venus_smmu 0x2b>, + <&venus_smmu 0x2c>, + <&venus_smmu 0x2d>, + <&venus_smmu 0x31>; + memory-region = <&venus_mem>; + + video-decoder { + compatible = "venus-decoder"; + clocks = <&mmcc VIDEO_SUBCORE0_CLK>; + clock-names = "core"; + power-domains = <&mmcc VENUS_CORE0_GDSC>; + }; + + video-encoder { + compatible = "venus-encoder"; + clocks = <&mmcc VIDEO_SUBCORE1_CLK>; + clock-names = "core"; + power-domains = <&mmcc VENUS_CORE1_GDSC>; + }; + }; -- cgit v1.2.3 From 49a3797aa51623f9891a832eff9dea89fb00c1c1 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 17 Dec 2019 14:25:34 +0100 Subject: media: dt-bindings: media: venus: Convert sdm845 to DT schema Convert qcom,sdm845-venus Venus binding to DT schema. Reviewed-by: Rob Herring Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- .../bindings/media/qcom,sdm845-venus.yaml | 156 +++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 Documentation/devicetree/bindings/media/qcom,sdm845-venus.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/qcom,sdm845-venus.yaml b/Documentation/devicetree/bindings/media/qcom,sdm845-venus.yaml new file mode 100644 index 000000000000..05cabe4e893a --- /dev/null +++ b/Documentation/devicetree/bindings/media/qcom,sdm845-venus.yaml @@ -0,0 +1,156 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/media/qcom,sdm845-venus.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Qualcomm Venus video encode and decode accelerators + +maintainers: + - Stanimir Varbanov + +description: | + The Venus IP is a video encode and decode accelerator present + on Qualcomm platforms + +properties: + compatible: + const: qcom,sdm845-venus + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + power-domains: + maxItems: 1 + + clocks: + maxItems: 3 + + clock-names: + items: + - const: core + - const: iface + - const: bus + + iommus: + maxItems: 2 + + memory-region: + maxItems: 1 + + video-core0: + type: object + + properties: + compatible: + const: venus-decoder + + clocks: + maxItems: 2 + + clock-names: + items: + - const: core + - const: bus + + power-domains: + maxItems: 1 + + required: + - compatible + - clocks + - clock-names + - power-domains + + additionalProperties: false + + video-core1: + type: object + + properties: + compatible: + const: venus-encoder + + clocks: + maxItems: 2 + + clock-names: + items: + - const: core + - const: bus + + power-domains: + maxItems: 1 + + required: + - compatible + - clocks + - clock-names + - power-domains + + additionalProperties: false + + video-firmware: + type: object + + description: | + Firmware subnode is needed when the platform does not + have TrustZone. + + properties: + iommus: + maxItems: 1 + + required: + - iommus + +required: + - compatible + - reg + - interrupts + - power-domains + - clocks + - clock-names + - iommus + - memory-region + - video-core0 + - video-core1 + +examples: + - | + #include + #include + + video-codec@aa00000 { + compatible = "qcom,sdm845-venus"; + reg = <0 0x0aa00000 0 0xff000>; + interrupts = ; + clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>, + <&videocc VIDEO_CC_VENUS_AHB_CLK>, + <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>; + clock-names = "core", "iface", "bus"; + power-domains = <&videocc VENUS_GDSC>; + iommus = <&apps_smmu 0x10a0 0x8>, + <&apps_smmu 0x10b0 0x0>; + memory-region = <&venus_mem>; + + video-core0 { + compatible = "venus-decoder"; + clocks = <&videocc VIDEO_CC_VCODEC0_CORE_CLK>, + <&videocc VIDEO_CC_VCODEC0_AXI_CLK>; + clock-names = "core", "bus"; + power-domains = <&videocc VCODEC0_GDSC>; + }; + + video-core1 { + compatible = "venus-encoder"; + clocks = <&videocc VIDEO_CC_VCODEC1_CORE_CLK>, + <&videocc VIDEO_CC_VCODEC1_AXI_CLK>; + clock-names = "core", "bus"; + power-domains = <&videocc VCODEC1_GDSC>; + }; + }; -- cgit v1.2.3 From fde57f90a9838cf86e337cf3cd0e3a108ff222d5 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 17 Dec 2019 14:26:18 +0100 Subject: media: dt-bindings: media: venus: Add sdm845v2 DT schema Add new qcom,sdm845-venus-v2 DT binding schema. Reviewed-by: Rob Herring Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- .../bindings/media/qcom,sdm845-venus-v2.yaml | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml b/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml new file mode 100644 index 000000000000..8552f4ab907e --- /dev/null +++ b/Documentation/devicetree/bindings/media/qcom,sdm845-venus-v2.yaml @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/media/qcom,sdm845-venus-v2.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Qualcomm Venus video encode and decode accelerators + +maintainers: + - Stanimir Varbanov + +description: | + The Venus IP is a video encode and decode accelerator present + on Qualcomm platforms + +properties: + compatible: + const: qcom,sdm845-venus-v2 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + power-domains: + maxItems: 3 + + power-domain-names: + items: + - const: venus + - const: vcodec0 + - const: vcodec1 + + clocks: + maxItems: 7 + + clock-names: + items: + - const: core + - const: iface + - const: bus + - const: vcodec0_core + - const: vcodec0_bus + - const: vcodec1_core + - const: vcodec1_bus + + iommus: + maxItems: 2 + + memory-region: + maxItems: 1 + + video-core0: + type: object + + properties: + compatible: + const: venus-decoder + + required: + - compatible + + additionalProperties: false + + video-core1: + type: object + + properties: + compatible: + const: venus-encoder + + required: + - compatible + + additionalProperties: false + + video-firmware: + type: object + + description: | + Firmware subnode is needed when the platform does not + have TrustZone. + + properties: + iommus: + maxItems: 1 + + required: + - iommus + +required: + - compatible + - reg + - interrupts + - power-domains + - power-domain-names + - clocks + - clock-names + - iommus + - memory-region + - video-core0 + - video-core1 + +examples: + - | + #include + #include + + video-codec@aa00000 { + compatible = "qcom,sdm845-venus-v2"; + reg = <0 0x0aa00000 0 0xff000>; + interrupts = ; + clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>, + <&videocc VIDEO_CC_VENUS_AHB_CLK>, + <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>, + <&videocc VIDEO_CC_VCODEC0_CORE_CLK>, + <&videocc VIDEO_CC_VCODEC0_AXI_CLK>, + <&videocc VIDEO_CC_VCODEC1_CORE_CLK>, + <&videocc VIDEO_CC_VCODEC1_AXI_CLK>; + clock-names = "core", "iface", "bus", + "vcodec0_core", "vcodec0_bus", + "vcodec1_core", "vcodec1_bus"; + power-domains = <&videocc VENUS_GDSC>, + <&videocc VCODEC0_GDSC>, + <&videocc VCODEC1_GDSC>; + power-domain-names = "venus", "vcodec0", "vcodec1"; + iommus = <&apps_smmu 0x10a0 0x8>, + <&apps_smmu 0x10b0 0x0>; + memory-region = <&venus_mem>; + + video-core0 { + compatible = "venus-decoder"; + }; + + video-core1 { + compatible = "venus-encoder"; + }; + }; -- cgit v1.2.3 From 0e1558deedad68cf3aae0e94ec50ece10c60ed9c Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Wed, 18 Dec 2019 14:04:54 +0100 Subject: media: dt-bindings: media: venus: delete old binding document After transitioning to YAML DT schema we don't need this old-style document. Acked-by: Rob Herring Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- .../devicetree/bindings/media/qcom,venus.txt | 120 --------------------- 1 file changed, 120 deletions(-) delete mode 100644 Documentation/devicetree/bindings/media/qcom,venus.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/qcom,venus.txt b/Documentation/devicetree/bindings/media/qcom,venus.txt deleted file mode 100644 index b602c4c025e7..000000000000 --- a/Documentation/devicetree/bindings/media/qcom,venus.txt +++ /dev/null @@ -1,120 +0,0 @@ -* Qualcomm Venus video encoder/decoder accelerators - -- compatible: - Usage: required - Value type: - Definition: Value should contain one of: - - "qcom,msm8916-venus" - - "qcom,msm8996-venus" - - "qcom,sdm845-venus" -- reg: - Usage: required - Value type: - Definition: Register base address and length of the register map. -- interrupts: - Usage: required - Value type: - Definition: Should contain interrupt line number. -- clocks: - Usage: required - Value type: - Definition: A List of phandle and clock specifier pairs as listed - in clock-names property. -- clock-names: - Usage: required for msm8916 - Value type: - Definition: Should contain the following entries: - - "core" Core video accelerator clock - - "iface" Video accelerator AHB clock - - "bus" Video accelerator AXI clock -- clock-names: - Usage: required for msm8996 - Value type: - Definition: Should contain the following entries: - - "core" Core video accelerator clock - - "iface" Video accelerator AHB clock - - "bus" Video accelerator AXI clock - - "mbus" Video MAXI clock -- power-domains: - Usage: required - Value type: - Definition: A phandle and power domain specifier pairs to the - power domain which is responsible for collapsing - and restoring power to the peripheral. -- iommus: - Usage: required - Value type: - Definition: A list of phandle and IOMMU specifier pairs. -- memory-region: - Usage: required - Value type: - Definition: reference to the reserved-memory for the firmware - memory region. - -* Subnodes -The Venus video-codec node must contain two subnodes representing -video-decoder and video-encoder, and one optional firmware subnode. -Firmware subnode is needed when the platform does not have TrustZone. - -Every of video-encoder or video-decoder subnode should have: - -- compatible: - Usage: required - Value type: - Definition: Value should contain "venus-decoder" or "venus-encoder" -- clocks: - Usage: required for msm8996 - Value type: - Definition: A List of phandle and clock specifier pairs as listed - in clock-names property. -- clock-names: - Usage: required for msm8996 - Value type: - Definition: Should contain the following entries: - - "core" Subcore video accelerator clock - -- power-domains: - Usage: required for msm8996 - Value type: - Definition: A phandle and power domain specifier pairs to the - power domain which is responsible for collapsing - and restoring power to the subcore. - -The firmware subnode must have: - -- iommus: - Usage: required - Value type: - Definition: A list of phandle and IOMMU specifier pairs. - -* An Example - video-codec@1d00000 { - compatible = "qcom,msm8916-venus"; - reg = <0x01d00000 0xff000>; - interrupts = ; - clocks = <&gcc GCC_VENUS0_VCODEC0_CLK>, - <&gcc GCC_VENUS0_AHB_CLK>, - <&gcc GCC_VENUS0_AXI_CLK>; - clock-names = "core", "iface", "bus"; - power-domains = <&gcc VENUS_GDSC>; - iommus = <&apps_iommu 5>; - memory-region = <&venus_mem>; - - video-decoder { - compatible = "venus-decoder"; - clocks = <&mmcc VIDEO_SUBCORE0_CLK>; - clock-names = "core"; - power-domains = <&mmcc VENUS_CORE0_GDSC>; - }; - - video-encoder { - compatible = "venus-encoder"; - clocks = <&mmcc VIDEO_SUBCORE1_CLK>; - clock-names = "core"; - power-domains = <&mmcc VENUS_CORE1_GDSC>; - }; - - video-firmware { - iommus = <&apps_iommu 0x10b2 0x0>; - }; - }; -- cgit v1.2.3 From 545d984cc90c5843ffa61afb1d0e64302368c411 Mon Sep 17 00:00:00 2001 From: Dikshita Agarwal Date: Tue, 14 Jan 2020 13:53:34 +0100 Subject: media: dt-bindings: media: venus: Add sc7180 DT schema Add new qcom,sc7180-venus DT binding schema. Reviewed-by: Rob Herring Signed-off-by: Dikshita Agarwal Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- .../bindings/media/qcom,sc7180-venus.yaml | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml b/Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml new file mode 100644 index 000000000000..764affa4877e --- /dev/null +++ b/Documentation/devicetree/bindings/media/qcom,sc7180-venus.yaml @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/media/qcom,sc7180-venus.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Qualcomm Venus video encode and decode accelerators + +maintainers: + - Stanimir Varbanov + +description: | + The Venus IP is a video encode and decode accelerator present + on Qualcomm platforms + +properties: + compatible: + const: qcom,sc7180-venus + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + power-domains: + maxItems: 2 + + power-domain-names: + items: + - const: venus + - const: vcodec0 + + clocks: + maxItems: 5 + + clock-names: + items: + - const: core + - const: iface + - const: bus + - const: vcodec0_core + - const: vcodec0_bus + + iommus: + maxItems: 1 + + memory-region: + maxItems: 1 + + interconnects: + maxItems: 2 + + interconnect-names: + items: + - const: video-mem + - const: cpu-cfg + + video-decoder: + type: object + + properties: + compatible: + const: venus-decoder + + required: + - compatible + + additionalProperties: false + + video-encoder: + type: object + + properties: + compatible: + const: venus-encoder + + required: + - compatible + + additionalProperties: false + + video-firmware: + type: object + + description: | + Firmware subnode is needed when the platform does not + have TrustZone. + + properties: + iommus: + maxItems: 1 + + required: + - iommus + +required: + - compatible + - reg + - interrupts + - power-domains + - power-domain-names + - clocks + - clock-names + - iommus + - memory-region + - video-decoder + - video-encoder + +examples: + - | + #include + #include + + venus: video-codec@aa00000 { + compatible = "qcom,sc7180-venus"; + reg = <0 0x0aa00000 0 0xff000>; + interrupts = ; + power-domains = <&videocc VENUS_GDSC>, + <&videocc VCODEC0_GDSC>; + power-domain-names = "venus", "vcodec0"; + clocks = <&videocc VIDEO_CC_VENUS_CTL_CORE_CLK>, + <&videocc VIDEO_CC_VENUS_AHB_CLK>, + <&videocc VIDEO_CC_VENUS_CTL_AXI_CLK>, + <&videocc VIDEO_CC_VCODEC0_CORE_CLK>, + <&videocc VIDEO_CC_VCODEC0_AXI_CLK>; + clock-names = "core", "iface", "bus", + "vcodec0_core", "vcodec0_bus"; + iommus = <&apps_smmu 0x0c00 0x60>; + memory-region = <&venus_mem>; + + video-decoder { + compatible = "venus-decoder"; + }; + + video-encoder { + compatible = "venus-encoder"; + }; + }; -- cgit v1.2.3 From b749ebe77d6c251d3a049d43f7e43781409c969c Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Thu, 12 Dec 2019 21:07:51 -0600 Subject: dt-bindings: arm: cpu: Add TI AM335x and AM437x enable method Add enable-method binding for "ti,am3352" and "ti,am4372". Acked-by: Santosh Shilimkar Signed-off-by: Dave Gerlach Acked-by: Rob Herring Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren --- Documentation/devicetree/bindings/arm/cpus.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 7a9c3ce2dbef..57fd46d8dfe1 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -201,6 +201,8 @@ properties: - rockchip,rk3066-smp - socionext,milbeaut-m10v-smp - ste,dbx500-smp + - ti,am3352 + - ti,am4372 cpu-release-addr: $ref: '/schemas/types.yaml#/definitions/uint64' -- cgit v1.2.3 From ecdc5d842bb3c166c3d549e52ba91a3955b257f2 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 23 Oct 2019 13:56:36 +0200 Subject: s390/protvirt: introduce host side setup Add "prot_virt" command line option which controls if the kernel protected VMs support is enabled at early boot time. This has to be done early, because it needs large amounts of memory and will disable some features like STP time sync for the lpar. Extend ultravisor info definitions and expose it via uv_info struct filled in during startup. Signed-off-by: Vasily Gorbik Reviewed-by: Thomas Huth Acked-by: David Hildenbrand Reviewed-by: Cornelia Huck Acked-by: Christian Borntraeger [borntraeger@de.ibm.com: patch merging, splitting, fixing] Signed-off-by: Christian Borntraeger --- Documentation/admin-guide/kernel-parameters.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..b0beae9b9e36 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3795,6 +3795,11 @@ before loading. See Documentation/admin-guide/blockdev/ramdisk.rst. + prot_virt= [S390] enable hosting protected virtual machines + isolated from the hypervisor (if hardware supports + that). + Format: + psi= [KNL] Enable or disable pressure stall information tracking. Format: -- cgit v1.2.3 From f65470661f3648fe6d3d13475d01a744bb14f8b4 Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Tue, 4 Feb 2020 08:51:58 -0500 Subject: KVM: s390/interrupt: do not pin adapter interrupt pages The adapter interrupt page containing the indicator bits is currently pinned. That means that a guest with many devices can pin a lot of memory pages in the host. This also complicates the reference tracking which is needed for memory management handling of protected virtual machines. It might also have some strange side effects for madvise MADV_DONTNEED and other things. We can simply try to get the userspace page set the bits and free the page. By storing the userspace address in the irq routing entry instead of the guest address we can actually avoid many lookups and list walks so that this variant is very likely not slower. If userspace messes around with the memory slots the worst thing that can happen is that we write to some other memory within that process. As we get the the page with FOLL_WRITE this can also not be used to write to shared read-only pages. Signed-off-by: Ulrich Weigand Acked-by: David Hildenbrand Reviewed-by: Cornelia Huck [borntraeger@de.ibm.com: patch simplification] Signed-off-by: Christian Borntraeger --- Documentation/virt/kvm/devices/s390_flic.rst | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virt/kvm/devices/s390_flic.rst b/Documentation/virt/kvm/devices/s390_flic.rst index 954190da7d04..ea96559ba501 100644 --- a/Documentation/virt/kvm/devices/s390_flic.rst +++ b/Documentation/virt/kvm/devices/s390_flic.rst @@ -108,16 +108,9 @@ Groups: mask or unmask the adapter, as specified in mask KVM_S390_IO_ADAPTER_MAP - perform a gmap translation for the guest address provided in addr, - pin a userspace page for the translated address and add it to the - list of mappings - - .. note:: A new mapping will be created unconditionally; therefore, - the calling code should avoid making duplicate mappings. - + This is now a no-op. The mapping is purely done by the irq route. KVM_S390_IO_ADAPTER_UNMAP - release a userspace page for the translated address specified in addr - from the list of mappings + This is now a no-op. The mapping is purely done by the irq route. KVM_DEV_FLIC_AISM modify the adapter-interruption-suppression mode for a given isc if the -- cgit v1.2.3 From 68cf7b1f137e61cea71925e48bc0c6d7bcfc637c Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 14 Jun 2019 13:11:21 +0200 Subject: KVM: s390: protvirt: disallow one_reg A lot of the registers are controlled by the Ultravisor and never visible to KVM. Some fields in the sie control block are overlayed, like gbea. As no known userspace uses the ONE_REG interface on s390 if sync regs are available, no functionality is lost if it is disabled for protected guests. Signed-off-by: Janosch Frank Reviewed-by: Thomas Huth Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand [borntraeger@de.ibm.com: patch merging, splitting, fixing] Signed-off-by: Christian Borntraeger --- Documentation/virt/kvm/api.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 97a72a53fa4b..7505d7a6c0d8 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2117,7 +2117,8 @@ Errors: ====== ============================================================  ENOENT   no such register -  EINVAL   invalid register ID, or no such register +  EINVAL   invalid register ID, or no such register or used with VMs in + protected virtualization mode on s390  EPERM    (arm64) register access not allowed before vcpu finalization ====== ============================================================ @@ -2552,7 +2553,8 @@ Errors include: ======== ============================================================  ENOENT   no such register -  EINVAL   invalid register ID, or no such register +  EINVAL   invalid register ID, or no such register or used with VMs in + protected virtualization mode on s390  EPERM    (arm64) register access not allowed before vcpu finalization ======== ============================================================ -- cgit v1.2.3 From a421027987ed794d0e54cc7820e685ad276a502d Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 28 Feb 2019 12:39:42 +0100 Subject: DOCUMENTATION: Protected virtual machine introduction and IPL Add documentation about protected KVM guests and description of changes that are necessary to move a KVM VM into Protected Virtualization mode. Signed-off-by: Janosch Frank Reviewed-by: Cornelia Huck [borntraeger@de.ibm.com: fixing and conversion to rst] Signed-off-by: Christian Borntraeger --- Documentation/virt/kvm/index.rst | 2 + Documentation/virt/kvm/s390-pv-boot.rst | 84 +++++++++++++++++++++++ Documentation/virt/kvm/s390-pv.rst | 116 ++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 Documentation/virt/kvm/s390-pv-boot.rst create mode 100644 Documentation/virt/kvm/s390-pv.rst (limited to 'Documentation') diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst index 774deaebf7fa..dcc252634cf9 100644 --- a/Documentation/virt/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst @@ -18,6 +18,8 @@ KVM nested-vmx ppc-pv s390-diag + s390-pv + s390-pv-boot timekeeping vcpu-requests diff --git a/Documentation/virt/kvm/s390-pv-boot.rst b/Documentation/virt/kvm/s390-pv-boot.rst new file mode 100644 index 000000000000..8b8fa0390409 --- /dev/null +++ b/Documentation/virt/kvm/s390-pv-boot.rst @@ -0,0 +1,84 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================================== +s390 (IBM Z) Boot/IPL of Protected VMs +====================================== + +Summary +------- +The memory of Protected Virtual Machines (PVMs) is not accessible to +I/O or the hypervisor. In those cases where the hypervisor needs to +access the memory of a PVM, that memory must be made accessible. +Memory made accessible to the hypervisor will be encrypted. See +:doc:`s390-pv` for details." + +On IPL (boot) a small plaintext bootloader is started, which provides +information about the encrypted components and necessary metadata to +KVM to decrypt the protected virtual machine. + +Based on this data, KVM will make the protected virtual machine known +to the Ultravisor (UV) and instruct it to secure the memory of the +PVM, decrypt the components and verify the data and address list +hashes, to ensure integrity. Afterwards KVM can run the PVM via the +SIE instruction which the UV will intercept and execute on KVM's +behalf. + +As the guest image is just like an opaque kernel image that does the +switch into PV mode itself, the user can load encrypted guest +executables and data via every available method (network, dasd, scsi, +direct kernel, ...) without the need to change the boot process. + + +Diag308 +------- +This diagnose instruction is the basic mechanism to handle IPL and +related operations for virtual machines. The VM can set and retrieve +IPL information blocks, that specify the IPL method/devices and +request VM memory and subsystem resets, as well as IPLs. + +For PVMs this concept has been extended with new subcodes: + +Subcode 8: Set an IPL Information Block of type 5 (information block +for PVMs) +Subcode 9: Store the saved block in guest memory +Subcode 10: Move into Protected Virtualization mode + +The new PV load-device-specific-parameters field specifies all data +that is necessary to move into PV mode. + +* PV Header origin +* PV Header length +* List of Components composed of + * AES-XTS Tweak prefix + * Origin + * Size + +The PV header contains the keys and hashes, which the UV will use to +decrypt and verify the PV, as well as control flags and a start PSW. + +The components are for instance an encrypted kernel, kernel parameters +and initrd. The components are decrypted by the UV. + +After the initial import of the encrypted data, all defined pages will +contain the guest content. All non-specified pages will start out as +zero pages on first access. + + +When running in protected virtualization mode, some subcodes will result in +exceptions or return error codes. + +Subcodes 4 and 7, which specify operations that do not clear the guest +memory, will result in specification exceptions. This is because the +UV will clear all memory when a secure VM is removed, and therefore +non-clearing IPL subcodes are not allowed. + +Subcodes 8, 9, 10 will result in specification exceptions. +Re-IPL into a protected mode is only possible via a detour into non +protected mode. + +Keys +---- +Every CEC will have a unique public key to enable tooling to build +encrypted images. +See `s390-tools `_ +for the tooling. diff --git a/Documentation/virt/kvm/s390-pv.rst b/Documentation/virt/kvm/s390-pv.rst new file mode 100644 index 000000000000..774a8c606091 --- /dev/null +++ b/Documentation/virt/kvm/s390-pv.rst @@ -0,0 +1,116 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================================= +s390 (IBM Z) Ultravisor and Protected VMs +========================================= + +Summary +------- +Protected virtual machines (PVM) are KVM VMs that do not allow KVM to +access VM state like guest memory or guest registers. Instead, the +PVMs are mostly managed by a new entity called Ultravisor (UV). The UV +provides an API that can be used by PVMs and KVM to request management +actions. + +Each guest starts in non-protected mode and then may make a request to +transition into protected mode. On transition, KVM registers the guest +and its VCPUs with the Ultravisor and prepares everything for running +it. + +The Ultravisor will secure and decrypt the guest's boot memory +(i.e. kernel/initrd). It will safeguard state changes like VCPU +starts/stops and injected interrupts while the guest is running. + +As access to the guest's state, such as the SIE state description, is +normally needed to be able to run a VM, some changes have been made in +the behavior of the SIE instruction. A new format 4 state description +has been introduced, where some fields have different meanings for a +PVM. SIE exits are minimized as much as possible to improve speed and +reduce exposed guest state. + + +Interrupt injection +------------------- +Interrupt injection is safeguarded by the Ultravisor. As KVM doesn't +have access to the VCPUs' lowcores, injection is handled via the +format 4 state description. + +Machine check, external, IO and restart interruptions each can be +injected on SIE entry via a bit in the interrupt injection control +field (offset 0x54). If the guest cpu is not enabled for the interrupt +at the time of injection, a validity interception is recognized. The +format 4 state description contains fields in the interception data +block where data associated with the interrupt can be transported. + +Program and Service Call exceptions have another layer of +safeguarding; they can only be injected for instructions that have +been intercepted into KVM. The exceptions need to be a valid outcome +of an instruction emulation by KVM, e.g. we can never inject a +addressing exception as they are reported by SIE since KVM has no +access to the guest memory. + + +Mask notification interceptions +------------------------------- +KVM cannot intercept lctl(g) and lpsw(e) anymore in order to be +notified when a PVM enables a certain class of interrupt. As a +replacement, two new interception codes have been introduced: One +indicating that the contents of CRs 0, 6, or 14 have been changed, +indicating different interruption subclasses; and one indicating that +PSW bit 13 has been changed, indicating that a machine check +intervention was requested and those are now enabled. + +Instruction emulation +--------------------- +With the format 4 state description for PVMs, the SIE instruction already +interprets more instructions than it does with format 2. It is not able +to interpret every instruction, but needs to hand some tasks to KVM; +therefore, the SIE and the ultravisor safeguard emulation inputs and outputs. + +The control structures associated with SIE provide the Secure +Instruction Data Area (SIDA), the Interception Parameters (IP) and the +Secure Interception General Register Save Area. Guest GRs and most of +the instruction data, such as I/O data structures, are filtered. +Instruction data is copied to and from the SIDA when needed. Guest +GRs are put into / retrieved from the Secure Interception General +Register Save Area. + +Only GR values needed to emulate an instruction will be copied into this +save area and the real register numbers will be hidden. + +The Interception Parameters state description field still contains the +the bytes of the instruction text, but with pre-set register values +instead of the actual ones. I.e. each instruction always uses the same +instruction text, in order not to leak guest instruction text. +This also implies that the register content that a guest had in r +may be in r from the hypervisor's point of view. + +The Secure Instruction Data Area contains instruction storage +data. Instruction data, i.e. data being referenced by an instruction +like the SCCB for sclp, is moved via the SIDA. When an instruction is +intercepted, the SIE will only allow data and program interrupts for +this instruction to be moved to the guest via the two data areas +discussed before. Other data is either ignored or results in validity +interceptions. + + +Instruction emulation interceptions +----------------------------------- +There are two types of SIE secure instruction intercepts: the normal +and the notification type. Normal secure instruction intercepts will +make the guest pending for instruction completion of the intercepted +instruction type, i.e. on SIE entry it is attempted to complete +emulation of the instruction with the data provided by KVM. That might +be a program exception or instruction completion. + +The notification type intercepts inform KVM about guest environment +changes due to guest instruction interpretation. Such an interception +is recognized, for example, for the store prefix instruction to provide +the new lowcore location. On SIE reentry, any KVM data in the data areas +is ignored and execution continues as if the guest instruction had +completed. For that reason KVM is not allowed to inject a program +interrupt. + +Links +----- +`KVM Forum 2019 presentation `_ -- cgit v1.2.3 From 04ed89dc4aeba57ab99df16edbd9d06e43d0a2c4 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 8 Nov 2019 05:05:26 -0500 Subject: KVM: s390: protvirt: Add KVM api documentation Add documentation for KVM_CAP_S390_PROTECTED capability and the KVM_S390_PV_COMMAND ioctl. Signed-off-by: Janosch Frank Reviewed-by: Cornelia Huck [borntraeger@de.ibm.com: patch merging, splitting, fixing] Signed-off-by: Christian Borntraeger --- Documentation/virt/kvm/api.rst | 59 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'Documentation') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 7505d7a6c0d8..bae90f3cd11d 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4648,6 +4648,54 @@ the clear cpu reset definition in the POP. However, the cpu is not put into ESA mode. This reset is a superset of the initial reset. +4.125 KVM_S390_PV_COMMAND +------------------------- + +:Capability: KVM_CAP_S390_PROTECTED +:Architectures: s390 +:Type: vm ioctl +:Parameters: struct kvm_pv_cmd +:Returns: 0 on success, < 0 on error + +:: + + struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; + }; + +cmd values: + +KVM_PV_ENABLE + Allocate memory and register the VM with the Ultravisor, thereby + donating memory to the Ultravisor that will become inaccessible to + KVM. All existing CPUs are converted to protected ones. After this + command has succeeded, any CPU added via hotplug will become + protected during its creation as well. + +KVM_PV_DISABLE + + Deregister the VM from the Ultravisor and reclaim the memory that + had been donated to the Ultravisor, making it usable by the kernel + again. All registered VCPUs are converted back to non-protected + ones. + +KVM_PV_VM_SET_SEC_PARMS + Pass the image header from VM memory to the Ultravisor in + preparation of image unpacking and verification. + +KVM_PV_VM_UNPACK + Unpack (protect and decrypt) a page of the encrypted boot image. + +KVM_PV_VM_VERIFY + Verify the integrity of the unpacked image. Only if this succeeds, + KVM is allowed to start protected VCPUs. + + 5. The kvm_run structure ======================== @@ -6026,3 +6074,14 @@ Architectures: s390 This capability indicates that the KVM_S390_NORMAL_RESET and KVM_S390_CLEAR_RESET ioctls are available. + +8.23 KVM_CAP_S390_PROTECTED + +Architecture: s390 + + +This capability indicates that the Ultravisor has been initialized and +KVM can therefore start protected VMs. +This capability governs the KVM_S390_PV_COMMAND ioctl and the +KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected +guests when the state change is invalid. -- cgit v1.2.3 From 91a208f2185ad4855ff03c342d0b7e4f5fc6f5df Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Feb 2020 10:23:41 +0000 Subject: net: phylink: propagate resolved link config via mac_link_up() Propagate the resolved link parameters via the mac_link_up() call for MACs that do not automatically track their PCS state. We propagate the link parameters via function arguments so that inappropriate members of struct phylink_link_state can't be accessed, and creating a new structure just for this adds needless complexity to the API. Tested-by: Andre Przywara Tested-by: Alexandre Belloni Tested-by: Vladimir Oltean Signed-off-by: Russell King Signed-off-by: David S. Miller --- Documentation/networking/sfp-phylink.rst | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst index d753a309f9d1..8d7af28cd835 100644 --- a/Documentation/networking/sfp-phylink.rst +++ b/Documentation/networking/sfp-phylink.rst @@ -74,10 +74,13 @@ phylib to the sfp/phylink support. Please send patches to improve this documentation. 1. Optionally split the network driver's phylib update function into - three parts dealing with link-down, link-up and reconfiguring the - MAC settings. This can be done as a separate preparation commit. + two parts dealing with link-down and link-up. This can be done as + a separate preparation commit. - An example of this preparation can be found in git commit fc548b991fb0. + An older example of this preparation can be found in git commit + fc548b991fb0, although this was splitting into three parts; the + link-up part now includes configuring the MAC for the link settings. + Please see :c:func:`mac_link_up` for more information on this. 2. Replace:: @@ -207,6 +210,14 @@ this documentation. using. This is particularly important for in-band negotiation methods such as 1000base-X and SGMII. + The :c:func:`mac_link_up` method is used to inform the MAC that the + link has come up. The call includes the negotiation mode and interface + for reference only. The finalised link parameters are also supplied + (speed, duplex and flow control/pause enablement settings) which + should be used to configure the MAC when the MAC and PCS are not + tightly integrated, or when the settings are not coming from in-band + negotiation. + The :c:func:`mac_config` method is used to update the MAC with the requested state, and must avoid unnecessarily taking the link down when making changes to the MAC configuration. This means the -- cgit v1.2.3 From a52dede4761885fe292d46e7b66f2755bd5b5430 Mon Sep 17 00:00:00 2001 From: Bingbu Cao Date: Mon, 20 Jan 2020 04:22:21 -0300 Subject: media: ipu3.rst: add imgu pipe config tool link A specific tool which can be used to generate imgu intermedia resolutions now is public on github, this patch adds this information into ipu3.rst. Signed-off-by: Bingbu Cao Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/v4l-drivers/ipu3.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/media/v4l-drivers/ipu3.rst b/Documentation/media/v4l-drivers/ipu3.rst index e4904ab44e60..a694f49491f9 100644 --- a/Documentation/media/v4l-drivers/ipu3.rst +++ b/Documentation/media/v4l-drivers/ipu3.rst @@ -311,10 +311,13 @@ Down Scaler and GDC blocks should be configured with the supported resolutions as each hardware block has its own alignment requirement. You must configure the output resolution of the hardware blocks smartly to meet -the hardware requirement along with keeping the maximum field of view. -The intermediate resolutions can be generated by specific tool and this -information can be obtained by looking at the following IPU3 ImgU configuration -table. +the hardware requirement along with keeping the maximum field of view. The +intermediate resolutions can be generated by specific tool - + +https://github.com/intel/intel-ipu3-pipecfg + +This tool can be used to generate intermediate resolutions. More information can +be obtained by looking at the following IPU3 ImgU configuration table. https://chromium.googlesource.com/chromiumos/overlays/board-overlays/+/master -- cgit v1.2.3 From 9d730f2cf4c0391785855dd231577d2de2594df9 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 20 Jan 2020 05:15:57 -0300 Subject: media: dt-bindings: media: i2c: Add IMX219 CMOS sensor binding Add YAML device tree binding for IMX219 CMOS image sensor, and the relevant MAINTAINERS entries. Signed-off-by: Andrey Konovalov Reviewed-by: Rob Herring Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- .../devicetree/bindings/media/i2c/imx219.yaml | 114 +++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 Documentation/devicetree/bindings/media/i2c/imx219.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/i2c/imx219.yaml b/Documentation/devicetree/bindings/media/i2c/imx219.yaml new file mode 100644 index 000000000000..32d6b693274f --- /dev/null +++ b/Documentation/devicetree/bindings/media/i2c/imx219.yaml @@ -0,0 +1,114 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/i2c/imx219.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sony 1/4.0-Inch 8Mpixel CMOS Digital Image Sensor + +maintainers: + - Dave Stevenson + +description: |- + The Sony imx219 is a 1/4.0-inch CMOS active pixel digital image sensor + with an active array size of 3280H x 2464V. It is programmable through + I2C interface. The I2C address is fixed to 0x10 as per sensor data sheet. + Image data is sent through MIPI CSI-2, which is configured as either 2 or + 4 data lanes. + +properties: + compatible: + const: sony,imx219 + + reg: + description: I2C device address + maxItems: 1 + + clocks: + maxItems: 1 + + VDIG-supply: + description: + Digital I/O voltage supply, 1.8 volts + + VANA-supply: + description: + Analog voltage supply, 2.8 volts + + VDDL-supply: + description: + Digital core voltage supply, 1.2 volts + + reset-gpios: + description: |- + Reference to the GPIO connected to the xclr pin, if any. + Must be released (set high) after all supplies are applied. + + # See ../video-interfaces.txt for more details + port: + type: object + properties: + endpoint: + type: object + properties: + data-lanes: + description: |- + The sensor supports either two-lane, or four-lane operation. + If this property is omitted four-lane operation is assumed. + For two-lane operation the property must be set to <1 2>. + items: + - const: 1 + - const: 2 + + clock-noncontinuous: + type: boolean + description: |- + MIPI CSI-2 clock is non-continuous if this property is present, + otherwise it's continuous. + + link-frequencies: + allOf: + - $ref: /schemas/types.yaml#/definitions/uint64-array + description: + Allowed data bus frequencies. + + required: + - link-frequencies + +required: + - compatible + - reg + - clocks + - VANA-supply + - VDIG-supply + - VDDL-supply + - port + +additionalProperties: false + +examples: + - | + i2c0 { + #address-cells = <1>; + #size-cells = <0>; + + imx219: sensor@10 { + compatible = "sony,imx219"; + reg = <0x10>; + clocks = <&imx219_clk>; + VANA-supply = <&imx219_vana>; /* 2.8v */ + VDIG-supply = <&imx219_vdig>; /* 1.8v */ + VDDL-supply = <&imx219_vddl>; /* 1.2v */ + + port { + imx219_0: endpoint { + remote-endpoint = <&csi1_ep>; + data-lanes = <1 2>; + clock-noncontinuous; + link-frequencies = /bits/ 64 <456000000>; + }; + }; + }; + }; + +... -- cgit v1.2.3 From 4c4f3f33838faa740f97aa4ab5ab471684919003 Mon Sep 17 00:00:00 2001 From: Kalyani Akula Date: Mon, 17 Feb 2020 15:56:42 +0530 Subject: dt-bindings: crypto: Add bindings for ZynqMP AES-GCM driver Add documentation to describe Xilinx ZynqMP AES-GCM driver bindings. Signed-off-by: Kalyani Akula Reviewed-by: Rob Herring Acked-by: Michal Simek Signed-off-by: Herbert Xu --- .../bindings/crypto/xlnx,zynqmp-aes.yaml | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml new file mode 100644 index 000000000000..55dd6e3d270d --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/xlnx,zynqmp-aes.yaml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/xlnx,zynqmp-aes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Xilinx ZynqMP AES-GCM Hardware Accelerator Device Tree Bindings + +maintainers: + - Kalyani Akula + - Michal Simek + +description: | + The ZynqMP AES-GCM hardened cryptographic accelerator is used to + encrypt or decrypt the data with provided key and initialization vector. + +properties: + compatible: + const: xlnx,zynqmp-aes + +required: + - compatible + +additionalProperties: false + +examples: + - | + firmware { + zynqmp_firmware: zynqmp-firmware { + compatible = "xlnx,zynqmp-firmware"; + method = "smc"; + xlnx_aes: zynqmp-aes { + compatible = "xlnx,zynqmp-aes"; + }; + }; + }; +... -- cgit v1.2.3 From 87dac697a05a730d878f703a3c3dd78ac6c5bff4 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 27 Dec 2019 06:37:07 +0000 Subject: net/mlx5e: Add devlink fdb_large_groups parameter Add a devlink parameter to control the number of large groups in a autogrouped flow table. The default value is 15, and the range is between 1 and 1024. The size of each large group can be calculated according to the following formula: size = 4M / (fdb_large_groups + 1). Examples: - Set the number of large groups to 20. $ devlink dev param set pci/0000:82:00.0 name fdb_large_groups \ cmode driverinit value 20 Then run devlink reload command to apply the new value. $ devlink dev reload pci/0000:82:00.0 - Read the number of large groups in flow table. $ devlink dev param show pci/0000:82:00.0 name fdb_large_groups pci/0000:82:00.0: name fdb_large_groups type driver-specific values: cmode driverinit value 20 Signed-off-by: Jianbo Liu Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink/mlx5.rst | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 629a6e69c036..4e4b97f7971a 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -37,6 +37,12 @@ parameters. * ``smfs`` Software managed flow steering. In SMFS mode, the HW steering entities are created and manage through the driver without firmware intervention. + * - ``fdb_large_groups`` + - u32 + - driverinit + - Control the number of large groups (size > 1) in the FDB table. + + * The default value is 15, and the range is between 1 and 1024. The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD`` -- cgit v1.2.3 From 7fd2944bdcb3f07eac29efcfdd28bc9ccfb26ce6 Mon Sep 17 00:00:00 2001 From: David Dai Date: Fri, 28 Feb 2020 12:11:40 +0200 Subject: dt-bindings: interconnect: Convert qcom,sdm845 to DT schema Convert the qcom,sdm845 interconnect provider binding to DT schema. Signed-off-by: David Dai Reviewed-by: Rob Herring Signed-off-by: Odelu Kukatla Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/20200209183411.17195-2-sibis@codeaurora.org Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,sdm845.txt | 24 ------------ .../bindings/interconnect/qcom,sdm845.yaml | 43 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 24 deletions(-) delete mode 100644 Documentation/devicetree/bindings/interconnect/qcom,sdm845.txt create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sdm845.txt b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.txt deleted file mode 100644 index 5c4f1d911630..000000000000 --- a/Documentation/devicetree/bindings/interconnect/qcom,sdm845.txt +++ /dev/null @@ -1,24 +0,0 @@ -Qualcomm SDM845 Network-On-Chip interconnect driver binding ------------------------------------------------------------ - -SDM845 interconnect providers support system bandwidth requirements through -RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is -able to communicate with the BCM through the Resource State Coordinator (RSC) -associated with each execution environment. Provider nodes must reside within -an RPMh device node pertaining to their RSC and each provider maps to a single -RPMh resource. - -Required properties : -- compatible : shall contain only one of the following: - "qcom,sdm845-rsc-hlos" -- #interconnect-cells : should contain 1 - -Examples: - -apps_rsc: rsc { - rsc_hlos: interconnect { - compatible = "qcom,sdm845-rsc-hlos"; - #interconnect-cells = <1>; - }; -}; - diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml new file mode 100644 index 000000000000..11a495dbfc52 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,sdm845.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SDM845 Network-On-Chip Interconnect + +maintainers: + - Georgi Djakov + +description: | + SDM845 interconnect providers support system bandwidth requirements through + RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is + able to communicate with the BCM through the Resource State Coordinator (RSC) + associated with each execution environment. Provider nodes must reside within + an RPMh device node pertaining to their RSC and each provider maps to a + single RPMh resource. + +properties: + compatible: + enum: + - qcom,sdm845-rsc-hlos + + '#interconnect-cells': + const: 1 + +required: + - compatible + - '#interconnect-cells' + +additionalProperties: false + +examples: + - | + #include + + apps_rsc: rsc { + rsc_hlos: interconnect { + compatible = "qcom,sdm845-rsc-hlos"; + #interconnect-cells = <1>; + }; + }; -- cgit v1.2.3 From c92cf0b40a7dc53aa3eaac3cdd0706a3892148ce Mon Sep 17 00:00:00 2001 From: David Dai Date: Fri, 28 Feb 2020 12:11:40 +0200 Subject: dt-bindings: interconnect: Add YAML schemas for QCOM bcm-voter Add YAML schemas for interconnect bcm-voters found on QCOM RPMh-based SoCs. Signed-off-by: David Dai Signed-off-by: Odelu Kukatla Signed-off-by: Sibi Sankar Acked-by: Rob Herring Link: https://lore.kernel.org/r/20200209183411.17195-3-sibis@codeaurora.org Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,bcm-voter.yaml | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,bcm-voter.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,bcm-voter.yaml b/Documentation/devicetree/bindings/interconnect/qcom,bcm-voter.yaml new file mode 100644 index 000000000000..5971fc1df08d --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,bcm-voter.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,bcm-voter.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm BCM-Voter Interconnect + +maintainers: + - Georgi Djakov + +description: | + The Bus Clock Manager (BCM) is a dedicated hardware accelerator that manages + shared system resources by aggregating requests from multiple Resource State + Coordinators (RSC). Interconnect providers are able to vote for aggregated + thresholds values from consumers by communicating through their respective + RSCs. + +properties: + compatible: + enum: + - qcom,bcm-voter + +required: + - compatible + +additionalProperties: false + +examples: + # Example 1: apps bcm_voter on SDM845 SoC should be defined inside &apps_rsc node + # as defined in Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt + - | + + apps_bcm_voter: bcm_voter { + compatible = "qcom,bcm-voter"; + }; + + # Example 2: disp bcm_voter on SDM845 should be defined inside &disp_rsc node + # as defined in Documentation/devicetree/bindings/soc/qcom/rpmh-rsc.txt + - | + + disp_bcm_voter: bcm_voter { + compatible = "qcom,bcm-voter"; + }; +... -- cgit v1.2.3 From 6f690e16b5a08919feeced1dc4603aca01d7ed58 Mon Sep 17 00:00:00 2001 From: David Dai Date: Fri, 28 Feb 2020 12:11:40 +0200 Subject: dt-bindings: interconnect: Update Qualcomm SDM845 DT bindings Redefine the Network-on-Chip devices to more accurately describe the interconnect topology on Qualcomm's SDM845 platform. Each interconnect device can communicate with different instances of the RPMh hardware which are described as RSCs(Resource State Coordinators). Signed-off-by: David Dai Signed-off-by: Odelu Kukatla Signed-off-by: Sibi Sankar Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200209183411.17195-4-sibis@codeaurora.org Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,sdm845.yaml | 49 ++++++++++++++++++---- 1 file changed, 40 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml index 11a495dbfc52..8b087e0b0b81 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.yaml @@ -13,21 +13,44 @@ description: | SDM845 interconnect providers support system bandwidth requirements through RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is able to communicate with the BCM through the Resource State Coordinator (RSC) - associated with each execution environment. Provider nodes must reside within - an RPMh device node pertaining to their RSC and each provider maps to a - single RPMh resource. + associated with each execution environment. Provider nodes must point to at + least one RPMh device child node pertaining to their RSC and each provider + can map to multiple RPMh resources. properties: + reg: + maxItems: 1 + compatible: enum: - - qcom,sdm845-rsc-hlos + - qcom,sdm845-aggre1-noc + - qcom,sdm845-aggre2-noc + - qcom,sdm845-config-noc + - qcom,sdm845-dc-noc + - qcom,sdm845-gladiator-noc + - qcom,sdm845-mem-noc + - qcom,sdm845-mmss-noc + - qcom,sdm845-system-noc '#interconnect-cells': const: 1 + qcom,bcm-voters: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: | + List of phandles to qcom,bcm-voter nodes that are required by + this interconnect to send RPMh commands. + + qcom,bcm-voter-names: + $ref: /schemas/types.yaml#/definitions/string-array + description: | + Names for each of the qcom,bcm-voters specified. + required: - compatible + - reg - '#interconnect-cells' + - qcom,bcm-voters additionalProperties: false @@ -35,9 +58,17 @@ examples: - | #include - apps_rsc: rsc { - rsc_hlos: interconnect { - compatible = "qcom,sdm845-rsc-hlos"; - #interconnect-cells = <1>; - }; + mem_noc: interconnect@1380000 { + compatible = "qcom,sdm845-mem-noc"; + reg = <0 0x01380000 0 0x27200>; + #interconnect-cells = <1>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + + mmss_noc: interconnect@1740000 { + compatible = "qcom,sdm845-mmss-noc"; + reg = <0 0x01740000 0 0x1c1000>; + #interconnect-cells = <1>; + qcom,bcm-voter-names = "apps", "disp"; + qcom,bcm-voters = <&apps_bcm_voter>, <&disp_bcm_voter>; }; -- cgit v1.2.3 From 107db7ec783820411801c469ed08f8f68b369e08 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 6 Feb 2020 16:17:22 +0100 Subject: docs: networking: convert 6lowpan.txt to ReST - add SPDX header; - use document title markup; - mark code blocks and literals as such; - adjust identation, whitespaces and blank lines; - add to networking/index.rst. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Stefan Schmidt Signed-off-by: Marcel Holtmann --- Documentation/networking/6lowpan.rst | 53 ++++++++++++++++++++++++++++++++++++ Documentation/networking/6lowpan.txt | 50 ---------------------------------- Documentation/networking/index.rst | 1 + 3 files changed, 54 insertions(+), 50 deletions(-) create mode 100644 Documentation/networking/6lowpan.rst delete mode 100644 Documentation/networking/6lowpan.txt (limited to 'Documentation') diff --git a/Documentation/networking/6lowpan.rst b/Documentation/networking/6lowpan.rst new file mode 100644 index 000000000000..e70a6520cc33 --- /dev/null +++ b/Documentation/networking/6lowpan.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================== +Netdev private dataroom for 6lowpan interfaces +============================================== + +All 6lowpan able net devices, means all interfaces with ARPHRD_6LOWPAN, +must have "struct lowpan_priv" placed at beginning of netdev_priv. + +The priv_size of each interface should be calculate by:: + + dev->priv_size = LOWPAN_PRIV_SIZE(LL_6LOWPAN_PRIV_DATA); + +Where LL_PRIV_6LOWPAN_DATA is sizeof linklayer 6lowpan private data struct. +To access the LL_PRIV_6LOWPAN_DATA structure you can cast:: + + lowpan_priv(dev)-priv; + +to your LL_6LOWPAN_PRIV_DATA structure. + +Before registering the lowpan netdev interface you must run:: + + lowpan_netdev_setup(dev, LOWPAN_LLTYPE_FOOBAR); + +wheres LOWPAN_LLTYPE_FOOBAR is a define for your 6LoWPAN linklayer type of +enum lowpan_lltypes. + +Example to evaluate the private usually you can do:: + + static inline struct lowpan_priv_foobar * + lowpan_foobar_priv(struct net_device *dev) + { + return (struct lowpan_priv_foobar *)lowpan_priv(dev)->priv; + } + + switch (dev->type) { + case ARPHRD_6LOWPAN: + lowpan_priv = lowpan_priv(dev); + /* do great stuff which is ARPHRD_6LOWPAN related */ + switch (lowpan_priv->lltype) { + case LOWPAN_LLTYPE_FOOBAR: + /* do 802.15.4 6LoWPAN handling here */ + lowpan_foobar_priv(dev)->bar = foo; + break; + ... + } + break; + ... + } + +In case of generic 6lowpan branch ("net/6lowpan") you can remove the check +on ARPHRD_6LOWPAN, because you can be sure that these function are called +by ARPHRD_6LOWPAN interfaces. diff --git a/Documentation/networking/6lowpan.txt b/Documentation/networking/6lowpan.txt deleted file mode 100644 index 2e5a939d7e6f..000000000000 --- a/Documentation/networking/6lowpan.txt +++ /dev/null @@ -1,50 +0,0 @@ - -Netdev private dataroom for 6lowpan interfaces: - -All 6lowpan able net devices, means all interfaces with ARPHRD_6LOWPAN, -must have "struct lowpan_priv" placed at beginning of netdev_priv. - -The priv_size of each interface should be calculate by: - - dev->priv_size = LOWPAN_PRIV_SIZE(LL_6LOWPAN_PRIV_DATA); - -Where LL_PRIV_6LOWPAN_DATA is sizeof linklayer 6lowpan private data struct. -To access the LL_PRIV_6LOWPAN_DATA structure you can cast: - - lowpan_priv(dev)-priv; - -to your LL_6LOWPAN_PRIV_DATA structure. - -Before registering the lowpan netdev interface you must run: - - lowpan_netdev_setup(dev, LOWPAN_LLTYPE_FOOBAR); - -wheres LOWPAN_LLTYPE_FOOBAR is a define for your 6LoWPAN linklayer type of -enum lowpan_lltypes. - -Example to evaluate the private usually you can do: - -static inline struct lowpan_priv_foobar * -lowpan_foobar_priv(struct net_device *dev) -{ - return (struct lowpan_priv_foobar *)lowpan_priv(dev)->priv; -} - -switch (dev->type) { -case ARPHRD_6LOWPAN: - lowpan_priv = lowpan_priv(dev); - /* do great stuff which is ARPHRD_6LOWPAN related */ - switch (lowpan_priv->lltype) { - case LOWPAN_LLTYPE_FOOBAR: - /* do 802.15.4 6LoWPAN handling here */ - lowpan_foobar_priv(dev)->bar = foo; - break; - ... - } - break; -... -} - -In case of generic 6lowpan branch ("net/6lowpan") you can remove the check -on ARPHRD_6LOWPAN, because you can be sure that these function are called -by ARPHRD_6LOWPAN interfaces. diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index d07d9855dcd3..683bcbfbed75 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -33,6 +33,7 @@ Contents: tls tls-offload nfc + 6lowpan .. only:: subproject and html -- cgit v1.2.3 From 776ab6a3c2460404530345c270b90c51480c9473 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 24 Feb 2020 14:51:29 +0100 Subject: dt-bindings: reset: rcar-rst: Convert to json-schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the Renesas R-Car Reset Controller Device Tree binding documentation to json-schema. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Signed-off-by: Rob Herring --- .../devicetree/bindings/reset/renesas,rst.txt | 48 ----------------- .../devicetree/bindings/reset/renesas,rst.yaml | 63 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 48 deletions(-) delete mode 100644 Documentation/devicetree/bindings/reset/renesas,rst.txt create mode 100644 Documentation/devicetree/bindings/reset/renesas,rst.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/reset/renesas,rst.txt b/Documentation/devicetree/bindings/reset/renesas,rst.txt deleted file mode 100644 index de7f06ccd003..000000000000 --- a/Documentation/devicetree/bindings/reset/renesas,rst.txt +++ /dev/null @@ -1,48 +0,0 @@ -DT bindings for the Renesas R-Car and RZ/G Reset Controllers - -The R-Car and RZ/G Reset Controllers provide reset control, and implement the -following functions: - - Latching of the levels on mode pins when PRESET# is negated, - - Mode monitoring register, - - Reset control of peripheral devices (on R-Car Gen1), - - Watchdog timer (on R-Car Gen1), - - Register-based reset control and boot address registers for the various CPU - cores (on R-Car Gen2 and Gen3, and on RZ/G). - - -Required properties: - - compatible: Should be - - "renesas,-reset-wdt" for R-Car Gen1, - - "renesas,-rst" for R-Car Gen2 and Gen3, and RZ/G - Examples with soctypes are: - - "renesas,r8a7743-rst" (RZ/G1M) - - "renesas,r8a7744-rst" (RZ/G1N) - - "renesas,r8a7745-rst" (RZ/G1E) - - "renesas,r8a77470-rst" (RZ/G1C) - - "renesas,r8a774a1-rst" (RZ/G2M) - - "renesas,r8a774b1-rst" (RZ/G2N) - - "renesas,r8a774c0-rst" (RZ/G2E) - - "renesas,r8a7778-reset-wdt" (R-Car M1A) - - "renesas,r8a7779-reset-wdt" (R-Car H1) - - "renesas,r8a7790-rst" (R-Car H2) - - "renesas,r8a7791-rst" (R-Car M2-W) - - "renesas,r8a7792-rst" (R-Car V2H - - "renesas,r8a7793-rst" (R-Car M2-N) - - "renesas,r8a7794-rst" (R-Car E2) - - "renesas,r8a7795-rst" (R-Car H3) - - "renesas,r8a7796-rst" (R-Car M3-W) - - "renesas,r8a77961-rst" (R-Car M3-W+) - - "renesas,r8a77965-rst" (R-Car M3-N) - - "renesas,r8a77970-rst" (R-Car V3M) - - "renesas,r8a77980-rst" (R-Car V3H) - - "renesas,r8a77990-rst" (R-Car E3) - - "renesas,r8a77995-rst" (R-Car D3) - - reg: Address start and address range for the device. - - -Example: - - rst: reset-controller@e6160000 { - compatible = "renesas,r8a7795-rst"; - reg = <0 0xe6160000 0 0x0200>; - }; diff --git a/Documentation/devicetree/bindings/reset/renesas,rst.yaml b/Documentation/devicetree/bindings/reset/renesas,rst.yaml new file mode 100644 index 000000000000..b5de1d196a13 --- /dev/null +++ b/Documentation/devicetree/bindings/reset/renesas,rst.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/reset/renesas,rst.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Renesas R-Car and RZ/G Reset Controller + +maintainers: + - Geert Uytterhoeven + - Magnus Damm + +description: | + The R-Car and RZ/G Reset Controllers provide reset control, and implement the + following functions: + - Latching of the levels on mode pins when PRESET# is negated, + - Mode monitoring register, + - Reset control of peripheral devices (on R-Car Gen1), + - Watchdog timer (on R-Car Gen1), + - Register-based reset control and boot address registers for the various + CPU cores (on R-Car Gen2 and Gen3, and on RZ/G). + +properties: + compatible: + enum: + - renesas,r8a7743-rst # RZ/G1M + - renesas,r8a7744-rst # RZ/G1N + - renesas,r8a7745-rst # RZ/G1E + - renesas,r8a77470-rst # RZ/G1C + - renesas,r8a774a1-rst # RZ/G2M + - renesas,r8a774b1-rst # RZ/G2N + - renesas,r8a774c0-rst # RZ/G2E + - renesas,r8a7778-reset-wdt # R-Car M1A + - renesas,r8a7779-reset-wdt # R-Car H1 + - renesas,r8a7790-rst # R-Car H2 + - renesas,r8a7791-rst # R-Car M2-W + - renesas,r8a7792-rst # R-Car V2H + - renesas,r8a7793-rst # R-Car M2-N + - renesas,r8a7794-rst # R-Car E2 + - renesas,r8a7795-rst # R-Car H3 + - renesas,r8a7796-rst # R-Car M3-W + - renesas,r8a77961-rst # R-Car M3-W+ + - renesas,r8a77965-rst # R-Car M3-N + - renesas,r8a77970-rst # R-Car V3M + - renesas,r8a77980-rst # R-Car V3H + - renesas,r8a77990-rst # R-Car E3 + - renesas,r8a77995-rst # R-Car D3 + + reg: + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + rst: reset-controller@e6160000 { + compatible = "renesas,r8a7795-rst"; + reg = <0xe6160000 0x0200>; + }; -- cgit v1.2.3 From 8db9684beb500a4a1ec50aeadfe30f2e7e44fa24 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 24 Feb 2020 14:52:34 +0100 Subject: dt-bindings: power: rcar-sysc: Convert to json-schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the Renesas R-Car System Controller Device Tree binding documentation to json-schema. Signed-off-by: Geert Uytterhoeven Reviewed-by: Niklas Söderlund Signed-off-by: Rob Herring --- .../bindings/power/renesas,rcar-sysc.txt | 62 ------------------ .../bindings/power/renesas,rcar-sysc.yaml | 73 ++++++++++++++++++++++ 2 files changed, 73 insertions(+), 62 deletions(-) delete mode 100644 Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt create mode 100644 Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt deleted file mode 100644 index acb41fade926..000000000000 --- a/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt +++ /dev/null @@ -1,62 +0,0 @@ -DT bindings for the Renesas R-Car (RZ/G) System Controller - -== System Controller Node == - -The R-Car (RZ/G) System Controller provides power management for the CPU cores -and various coprocessors. - -Required properties: - - compatible: Must contain exactly one of the following: - - "renesas,r8a7743-sysc" (RZ/G1M) - - "renesas,r8a7744-sysc" (RZ/G1N) - - "renesas,r8a7745-sysc" (RZ/G1E) - - "renesas,r8a77470-sysc" (RZ/G1C) - - "renesas,r8a774a1-sysc" (RZ/G2M) - - "renesas,r8a774b1-sysc" (RZ/G2N) - - "renesas,r8a774c0-sysc" (RZ/G2E) - - "renesas,r8a7779-sysc" (R-Car H1) - - "renesas,r8a7790-sysc" (R-Car H2) - - "renesas,r8a7791-sysc" (R-Car M2-W) - - "renesas,r8a7792-sysc" (R-Car V2H) - - "renesas,r8a7793-sysc" (R-Car M2-N) - - "renesas,r8a7794-sysc" (R-Car E2) - - "renesas,r8a7795-sysc" (R-Car H3) - - "renesas,r8a7796-sysc" (R-Car M3-W) - - "renesas,r8a77961-sysc" (R-Car M3-W+) - - "renesas,r8a77965-sysc" (R-Car M3-N) - - "renesas,r8a77970-sysc" (R-Car V3M) - - "renesas,r8a77980-sysc" (R-Car V3H) - - "renesas,r8a77990-sysc" (R-Car E3) - - "renesas,r8a77995-sysc" (R-Car D3) - - reg: Address start and address range for the device. - - #power-domain-cells: Must be 1. - - -Example: - - sysc: system-controller@e6180000 { - compatible = "renesas,r8a7791-sysc"; - reg = <0 0xe6180000 0 0x0200>; - #power-domain-cells = <1>; - }; - - -== PM Domain Consumers == - -Devices residing in a power area must refer to that power area, as documented -by the generic PM domain bindings in -Documentation/devicetree/bindings/power/power_domain.txt. - -Required properties: - - power-domains: A phandle and symbolic PM domain specifier, as defined in - . - - -Example: - - L2_CA15: cache-controller@0 { - compatible = "cache"; - power-domains = <&sysc R8A7791_PD_CA15_SCU>; - cache-unified; - cache-level = <2>; - }; diff --git a/Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml new file mode 100644 index 000000000000..e59331e1d944 --- /dev/null +++ b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/power/renesas,rcar-sysc.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Renesas R-Car and RZ/G System Controller + +maintainers: + - Geert Uytterhoeven + - Magnus Damm + +description: + The R-Car (RZ/G) System Controller provides power management for the CPU + cores and various coprocessors. + +properties: + compatible: + enum: + - renesas,r8a7743-sysc # RZ/G1M + - renesas,r8a7744-sysc # RZ/G1N + - renesas,r8a7745-sysc # RZ/G1E + - renesas,r8a77470-sysc # RZ/G1C + - renesas,r8a774a1-sysc # RZ/G2M + - renesas,r8a774b1-sysc # RZ/G2N + - renesas,r8a774c0-sysc # RZ/G2E + - renesas,r8a7779-sysc # R-Car H1 + - renesas,r8a7790-sysc # R-Car H2 + - renesas,r8a7791-sysc # R-Car M2-W + - renesas,r8a7792-sysc # R-Car V2H + - renesas,r8a7793-sysc # R-Car M2-N + - renesas,r8a7794-sysc # R-Car E2 + - renesas,r8a7795-sysc # R-Car H3 + - renesas,r8a77961-sysc # R-Car M3-W+ + - renesas,r8a77965-sysc # R-Car M3-N + - renesas,r8a7796-sysc # R-Car M3-W + - renesas,r8a77970-sysc # R-Car V3M + - renesas,r8a77980-sysc # R-Car V3H + - renesas,r8a77990-sysc # R-Car E3 + - renesas,r8a77995-sysc # R-Car D3 + + reg: + maxItems: 1 + + '#power-domain-cells': + const: 1 + +required: + - compatible + - reg + - '#power-domain-cells' + +additionalProperties: false + +examples: + - | + // System Controller node + sysc: system-controller@e6180000 { + compatible = "renesas,r8a7791-sysc"; + reg = <0xe6180000 0x0200>; + #power-domain-cells = <1>; + }; + + - | + // Power Domain consumers + #include + + cache-controller-0 { + compatible = "cache"; + power-domains = <&sysc R8A7791_PD_CA15_SCU>; + cache-unified; + cache-level = <2>; + }; -- cgit v1.2.3 From c1507cf227824827135bc7df5522ce0b3c50be73 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 24 Feb 2020 14:52:51 +0100 Subject: dt-bindings: power: apmu: Convert to json-schema Convert the Renesas Advanced Power Management Unit Device Tree binding documentation to json-schema. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- .../devicetree/bindings/power/renesas,apmu.txt | 35 -------------- .../devicetree/bindings/power/renesas,apmu.yaml | 55 ++++++++++++++++++++++ 2 files changed, 55 insertions(+), 35 deletions(-) delete mode 100644 Documentation/devicetree/bindings/power/renesas,apmu.txt create mode 100644 Documentation/devicetree/bindings/power/renesas,apmu.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/power/renesas,apmu.txt b/Documentation/devicetree/bindings/power/renesas,apmu.txt deleted file mode 100644 index 5f24586c8cf3..000000000000 --- a/Documentation/devicetree/bindings/power/renesas,apmu.txt +++ /dev/null @@ -1,35 +0,0 @@ -DT bindings for the Renesas Advanced Power Management Unit - -Renesas R-Car and RZ/G1 SoCs utilize one or more APMU hardware units -for CPU core power domain control including SMP boot and CPU Hotplug. - -Required properties: - -- compatible: Should be "renesas,-apmu", "renesas,apmu" as fallback. - Examples with soctypes are: - - "renesas,r8a7743-apmu" (RZ/G1M) - - "renesas,r8a7744-apmu" (RZ/G1N) - - "renesas,r8a7745-apmu" (RZ/G1E) - - "renesas,r8a77470-apmu" (RZ/G1C) - - "renesas,r8a7790-apmu" (R-Car H2) - - "renesas,r8a7791-apmu" (R-Car M2-W) - - "renesas,r8a7792-apmu" (R-Car V2H) - - "renesas,r8a7793-apmu" (R-Car M2-N) - - "renesas,r8a7794-apmu" (R-Car E2) - -- reg: Base address and length of the I/O registers used by the APMU. - -- cpus: This node contains a list of CPU cores, which should match the order - of CPU cores used by the WUPCR and PSTR registers in the Advanced Power - Management Unit section of the device's datasheet. - - -Example: - -This shows the r8a7791 APMU that can control CPU0 and CPU1. - - apmu@e6152000 { - compatible = "renesas,r8a7791-apmu", "renesas,apmu"; - reg = <0 0xe6152000 0 0x188>; - cpus = <&cpu0 &cpu1>; - }; diff --git a/Documentation/devicetree/bindings/power/renesas,apmu.yaml b/Documentation/devicetree/bindings/power/renesas,apmu.yaml new file mode 100644 index 000000000000..078b2cb40fe3 --- /dev/null +++ b/Documentation/devicetree/bindings/power/renesas,apmu.yaml @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/power/renesas,apmu.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Renesas Advanced Power Management Unit + +maintainers: + - Geert Uytterhoeven + - Magnus Damm + +description: + Renesas R-Car Gen2 and RZ/G1 SoCs utilize one or more APMU hardware units for + CPU core power domain control including SMP boot and CPU Hotplug. + +properties: + compatible: + items: + - enum: + - renesas,r8a7743-apmu # RZ/G1M + - renesas,r8a7744-apmu # RZ/G1N + - renesas,r8a7745-apmu # RZ/G1E + - renesas,r8a77470-apmu # RZ/G1C + - renesas,r8a7790-apmu # R-Car H2 + - renesas,r8a7791-apmu # R-Car M2-W + - renesas,r8a7792-apmu # R-Car V2H + - renesas,r8a7793-apmu # R-Car M2-N + - renesas,r8a7794-apmu # R-Car E2 + - const: renesas,apmu + + reg: + maxItems: 1 + + cpus: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: | + Array of phandles pointing to CPU cores, which should match the order of + CPU cores used by the WUPCR and PSTR registers in the Advanced Power + Management Unit section of the device's datasheet. + +required: + - compatible + - reg + - cpus + +additionalProperties: false + +examples: + - | + apmu@e6152000 { + compatible = "renesas,r8a7791-apmu", "renesas,apmu"; + reg = <0xe6152000 0x188>; + cpus = <&cpu0 &cpu1>; + }; -- cgit v1.2.3 From dc058092659c1147b943e694be8fadcdfcd97c1a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 23 Feb 2020 13:18:39 +0100 Subject: dt-bindings: Add vendor prefix for Hydis technologies This vendor has produced a number of display panels, including HVA40WV1. Cc: devicetree@vger.kernel.org Cc: Stephan Gerhold Signed-off-by: Linus Walleij Acked-by: Rob Herring Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200223121841.26836-1-linus.walleij@linaro.org --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index f70cf5f3e1b8..a2da166df1bc 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -425,6 +425,8 @@ patternProperties: description: Shenzhen Hugsun Technology Co. Ltd. "^hwacom,.*": description: HwaCom Systems Inc. + "^hydis,.*": + description: Hydis Technologies "^hyundai,.*": description: Hyundai Technology "^i2se,.*": -- cgit v1.2.3 From 5ce96fe032f43e92f5324a9b39c70de614efacaa Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 23 Feb 2020 13:18:40 +0100 Subject: drm/panel: Add DT bindings for Novatek NT35510-based panels This adds device tree bindings for the Novatek NT35510-based family of panels. Since several such panels are in existence we define bindings common for all, and define the compatible string for one certain panel (Hydis HVA40WV1). As other panels are discovered and investigated, we can add more compatibles to the binding using oneOf constructions. Cc: Stephan Gerhold Cc: devicetree@vger.kernel.org Signed-off-by: Linus Walleij Reviewed-by: Rob Herring Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20200223121841.26836-2-linus.walleij@linaro.org --- .../bindings/display/panel/novatek,nt35510.yaml | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml b/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml new file mode 100644 index 000000000000..791fc9daa68b --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/novatek,nt35510.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/novatek,nt35510.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Novatek NT35510-based display panels + +maintainers: + - Linus Walleij + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + items: + - const: hydis,hva40wv1 + - const: novatek,nt35510 + description: This indicates the panel manufacturer of the panel + that is in turn using the NT35510 panel driver. The compatible + string determines how the NT35510 panel driver shall be configured + to work with the indicated panel. The novatek,nt35510 compatible shall + always be provided as a fallback. + reg: true + reset-gpios: true + vdd-supply: + description: regulator that supplies the vdd voltage + vddi-supply: + description: regulator that supplies the vddi voltage + backlight: true + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + + dsi@a0351000 { + #address-cells = <1>; + #size-cells = <0>; + panel { + compatible = "hydis,hva40wv1", "novatek,nt35510"; + reg = <0>; + vdd-supply = <&ab8500_ldo_aux4_reg>; + vddi-supply = <&ab8500_ldo_aux6_reg>; + reset-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>; + backlight = <&gpio_bl>; + }; + }; + +... -- cgit v1.2.3 From 2f8caa9f8f3254cd4527ec6a71428107623b3fa8 Mon Sep 17 00:00:00 2001 From: Rajeshwari Date: Mon, 6 Jan 2020 18:59:29 +0530 Subject: dt-bindings: thermal: tsens: Add configuration in yaml Added configuration in dt-bindings for SC7180. Signed-off-by: Rajeshwari Reviewed-by: Bjorn Andersson Acked-by: Rob Herring Reviewed-by: Amit Kucheria Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1578317369-16045-3-git-send-email-rkambl@codeaurora.org --- Documentation/devicetree/bindings/thermal/qcom-tsens.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml index eef13b9446a8..c0ed030d0960 100644 --- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml +++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml @@ -38,6 +38,7 @@ properties: - enum: - qcom,msm8996-tsens - qcom,msm8998-tsens + - qcom,sc7180-tsens - qcom,sdm845-tsens - const: qcom,tsens-v2 -- cgit v1.2.3 From 3aa31ced2f4d2c6a38d1f9ee134e4180b6482934 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 16 Feb 2020 19:15:09 +0100 Subject: dt-bindings: display: add panel-timing.yaml Add meta-schema variant of panel-timing and reference it from panel-common.yaml. Part of this came form other files with other licenses - original commits: commit cc3f414cf2e4 ("video: add of helper for display timings/videomode") commit 86f46565dff3 ("dt-bindings: display: display-timing: Add property to configure sync drive edge") commit 9cad9c95d7e8 ("Documentation: DocBook DRM framework documentation") The original authors acked the license change to: (GPL-2.0-only OR BSD-2-Clause) v2: - Got OK from original authors for re-license Huge thanks for the quick replies! - Typo fixes (Oleksandr) - Drop -array variant when not needed (Maxime) - Replace oneOf:... with enum (Maxime) - Drop type from clock-frequency (Rob) - Drop "|" when not needed (Rob) v3: - Added comment to acks that are only for the license change - Add yaml document terminator "..." - Updated description (removed reference to native-mode) Signed-off-by: Sam Ravnborg Acked-by: Laurent Pinchart [license change] Acked-by: Peter Ujfalusi [license change] Acked-by: Steffen Trumtrar [license change] Acked-by: Philipp Zabel [license change] Reviewed-by: Rob Herring Acked-by: Maxime Ripard Cc: Thierry Reding Cc: Oleksandr Suvorov Cc: Maxime Ripard Cc: devicetree@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200216181513.28109-2-sam@ravnborg.org --- .../bindings/display/panel/panel-common.yaml | 7 +- .../bindings/display/panel/panel-timing.yaml | 227 +++++++++++++++++++++ 2 files changed, 230 insertions(+), 4 deletions(-) create mode 100644 Documentation/devicetree/bindings/display/panel/panel-timing.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panel-common.yaml b/Documentation/devicetree/bindings/display/panel/panel-common.yaml index ef8d8cdfcede..8070c439adbd 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-common.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-common.yaml @@ -54,13 +54,12 @@ properties: # Display Timings panel-timing: - type: object description: Most display panels are restricted to a single resolution and require specific display timings. The panel-timing subnode expresses those - timings as specified in the timing subnode section of the display timing - bindings defined in - Documentation/devicetree/bindings/display/panel/display-timing.txt. + timings. + allOf: + - $ref: panel-timing.yaml# # Connectivity port: diff --git a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml new file mode 100644 index 000000000000..bd558ad7891f --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml @@ -0,0 +1,227 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/panel-timing.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: panel timing bindings + +maintainers: + - Thierry Reding + - Sam Ravnborg + +description: | + There are different ways of describing the timing data of a panel. The + devicetree representation corresponds to the one commonly found in datasheets + for panels. + + The parameters are defined as seen in the following illustration. + + +----------+-------------------------------------+----------+-------+ + | | ^ | | | + | | |vback_porch | | | + | | v | | | + +----------#######################################----------+-------+ + | # ^ # | | + | # | # | | + | hback # | # hfront | hsync | + | porch # | hactive # porch | len | + |<-------->#<-------+--------------------------->#<-------->|<----->| + | # | # | | + | # |vactive # | | + | # | # | | + | # v # | | + +----------#######################################----------+-------+ + | | ^ | | | + | | |vfront_porch | | | + | | v | | | + +----------+-------------------------------------+----------+-------+ + | | ^ | | | + | | |vsync_len | | | + | | v | | | + +----------+-------------------------------------+----------+-------+ + + + The following is the panel timings shown with time on the x-axis. + This matches the timing diagrams often found in data sheets. + + Active Front Sync Back + Region Porch Porch + <-----------------------><----------------><-------------><--------------> + //////////////////////| + ////////////////////// | + ////////////////////// |.................. ................ + _______________ + + Timing can be specified either as a typical value or as a tuple + of min, typ, max values. + +properties: + + clock-frequency: + description: Panel clock in Hz + + hactive: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Horizontal panel resolution in pixels + + vactive: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Vertical panel resolution in pixels + + hfront-porch: + description: Horizontal front porch panel timing + oneOf: + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - maxItems: 1 + items: + description: typical number of pixels + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + - minItems: 3 + maxItems: 3 + items: + description: min, typ, max number of pixels + + hback-porch: + description: Horizontal back porch timing + oneOf: + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - maxItems: 1 + items: + description: typical number of pixels + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + - minItems: 3 + maxItems: 3 + items: + description: min, typ, max number of pixels + + hsync-len: + description: Horizontal sync length panel timing + oneOf: + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - maxItems: 1 + items: + description: typical number of pixels + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + - minItems: 3 + maxItems: 3 + items: + description: min, typ, max number of pixels + + vfront-porch: + description: Vertical front porch panel timing + oneOf: + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - maxItems: 1 + items: + description: typical number of lines + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + - minItems: 3 + maxItems: 3 + items: + description: min, typ, max number of lines + + vback-porch: + description: Vertical back porch panel timing + oneOf: + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - maxItems: 1 + items: + description: typical number of lines + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + - minItems: 3 + maxItems: 3 + items: + description: min, typ, max number of lines + + vsync-len: + description: Vertical sync length panel timing + oneOf: + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - maxItems: 1 + items: + description: typical number of lines + - allOf: + - $ref: /schemas/types.yaml#/definitions/uint32-array + - minItems: 3 + maxItems: 3 + items: + description: min, typ, max number of lines + + hsync-active: + description: | + Horizontal sync pulse. + 0 selects active low, 1 selects active high. + If omitted then it is not used by the hardware + enum: [0, 1] + + vsync-active: + description: | + Vertical sync pulse. + 0 selects active low, 1 selects active high. + If omitted then it is not used by the hardware + enum: [0, 1] + + de-active: + description: | + Data enable. + 0 selects active low, 1 selects active high. + If omitted then it is not used by the hardware + enum: [0, 1] + + pixelclk-active: + description: | + Data driving on rising or falling edge. + Use 0 to drive pixel data on falling edge and + sample data on rising edge. + Use 1 to drive pixel data on rising edge and + sample data on falling edge + enum: [0, 1] + + syncclk-active: + description: | + Drive sync on rising or sample sync on falling edge. + If not specified then the setup is as specified by pixelclk-active. + Use 0 to drive sync on falling edge and + sample sync on rising edge of pixel clock. + Use 1 to drive sync on rising edge and + sample sync on falling edge of pixel clock + enum: [0, 1] + + interlaced: + type: boolean + description: Enable interlaced mode + + doublescan: + type: boolean + description: Enable double scan mode + + doubleclk: + type: boolean + description: Enable double clock mode + +required: + - clock-frequency + - hactive + - vactive + - hfront-porch + - hback-porch + - hsync-len + - vfront-porch + - vback-porch + - vsync-len + +additionalProperties: false + +... -- cgit v1.2.3 From 4dd23a47d693c7f8d182fd8cc18081ff612c5271 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 16 Feb 2020 19:15:10 +0100 Subject: dt-bindings: display: convert display-timings to DT schema Add display-timings.yaml - that references panel-timings.yaml. display-timings.yaml will be used for display bindings when they are converted to meta-schema format. For now the old display-timing.txt points to the new display-timings.yaml - and all users are left as-is. v2: - Updated native-mode description v3: - Simpler "^timing" pattern (Rob) - timing node is of type object (Rob) - added display-timings to panel-common.yaml - added yaml document terminator "..." Signed-off-by: Sam Ravnborg Reviewed-by: Rob Herring Acked-by: Maxime Ripard Cc: Laurent Pinchart Cc: Thierry Reding Cc: Oleksandr Suvorov Cc: devicetree@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20200216181513.28109-3-sam@ravnborg.org --- .../bindings/display/panel/display-timing.txt | 124 +-------------------- .../bindings/display/panel/display-timings.yaml | 77 +++++++++++++ .../bindings/display/panel/panel-common.yaml | 8 ++ 3 files changed, 86 insertions(+), 123 deletions(-) create mode 100644 Documentation/devicetree/bindings/display/panel/display-timings.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/display-timing.txt b/Documentation/devicetree/bindings/display/panel/display-timing.txt index 78222ced1874..7f55ad4a40c4 100644 --- a/Documentation/devicetree/bindings/display/panel/display-timing.txt +++ b/Documentation/devicetree/bindings/display/panel/display-timing.txt @@ -1,123 +1 @@ -display-timing bindings -======================= - -display-timings node --------------------- - -required properties: - - none - -optional properties: - - native-mode: The native mode for the display, in case multiple modes are - provided. When omitted, assume the first node is the native. - -timing subnode --------------- - -required properties: - - hactive, vactive: display resolution - - hfront-porch, hback-porch, hsync-len: horizontal display timing parameters - in pixels - vfront-porch, vback-porch, vsync-len: vertical display timing parameters in - lines - - clock-frequency: display clock in Hz - -optional properties: - - hsync-active: hsync pulse is active low/high/ignored - - vsync-active: vsync pulse is active low/high/ignored - - de-active: data-enable pulse is active low/high/ignored - - pixelclk-active: with - - active high = drive pixel data on rising edge/ - sample data on falling edge - - active low = drive pixel data on falling edge/ - sample data on rising edge - - ignored = ignored - - syncclk-active: with - - active high = drive sync on rising edge/ - sample sync on falling edge of pixel - clock - - active low = drive sync on falling edge/ - sample sync on rising edge of pixel - clock - - omitted = same configuration as pixelclk-active - - interlaced (bool): boolean to enable interlaced mode - - doublescan (bool): boolean to enable doublescan mode - - doubleclk (bool): boolean to enable doubleclock mode - -All the optional properties that are not bool follow the following logic: - <1>: high active - <0>: low active - omitted: not used on hardware - -There are different ways of describing the capabilities of a display. The -devicetree representation corresponds to the one commonly found in datasheets -for displays. If a display supports multiple signal timings, the native-mode -can be specified. - -The parameters are defined as: - - +----------+-------------------------------------+----------+-------+ - | | ^ | | | - | | |vback_porch | | | - | | v | | | - +----------#######################################----------+-------+ - | # ^ # | | - | # | # | | - | hback # | # hfront | hsync | - | porch # | hactive # porch | len | - |<-------->#<-------+--------------------------->#<-------->|<----->| - | # | # | | - | # |vactive # | | - | # | # | | - | # v # | | - +----------#######################################----------+-------+ - | | ^ | | | - | | |vfront_porch | | | - | | v | | | - +----------+-------------------------------------+----------+-------+ - | | ^ | | | - | | |vsync_len | | | - | | v | | | - +----------+-------------------------------------+----------+-------+ - -Note: In addition to being used as subnode(s) of display-timings, the timing - subnode may also be used on its own. This is appropriate if only one mode - need be conveyed. In this case, the node should be named 'panel-timing'. - - -Example: - - display-timings { - native-mode = <&timing0>; - timing0: 1080p24 { - /* 1920x1080p24 */ - clock-frequency = <52000000>; - hactive = <1920>; - vactive = <1080>; - hfront-porch = <25>; - hback-porch = <25>; - hsync-len = <25>; - vback-porch = <2>; - vfront-porch = <2>; - vsync-len = <2>; - hsync-active = <1>; - }; - }; - -Every required property also supports the use of ranges, so the commonly used -datasheet description with minimum, typical and maximum values can be used. - -Example: - - timing1: timing { - /* 1920x1080p24 */ - clock-frequency = <148500000>; - hactive = <1920>; - vactive = <1080>; - hsync-len = <0 44 60>; - hfront-porch = <80 88 95>; - hback-porch = <100 148 160>; - vfront-porch = <0 4 6>; - vback-porch = <0 36 50>; - vsync-len = <0 5 6>; - }; +See display-timings.yaml in this directory. diff --git a/Documentation/devicetree/bindings/display/panel/display-timings.yaml b/Documentation/devicetree/bindings/display/panel/display-timings.yaml new file mode 100644 index 000000000000..c8c0c9cb0492 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/display-timings.yaml @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/display-timings.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: display timing bindings + +maintainers: + - Thierry Reding + - Laurent Pinchart + - Sam Ravnborg + +description: | + A display panel may be able to handle several display timings, + with different resolutions. + The display-timings node makes it possible to specify the timing + and to specify the timing that is native for the display. + +properties: + $nodename: + const: display-timings + + native-mode: + $ref: /schemas/types.yaml#/definitions/phandle + description: | + The default display timing is the one specified as native-mode. + If no native-mode is specified then the first node is assumed the + native mode. + +patternProperties: + "^timing": + type: object + allOf: + - $ref: panel-timing.yaml# + +additionalProperties: false + +examples: + - |+ + + /* + * Example that specifies panel timing using minimum, typical, + * maximum values as commonly used in datasheet description. + * timing1 is the native-mode. + */ + display-timings { + native-mode = <&timing1>; + timing0 { + /* 1920x1080p24 */ + clock-frequency = <148500000>; + hactive = <1920>; + vactive = <1080>; + hsync-len = <0 44 60>; + hfront-porch = <80 88 95>; + hback-porch = <100 148 160>; + vfront-porch = <0 4 6>; + vback-porch = <0 36 50>; + vsync-len = <0 5 6>; + }; + timing1 { + /* 1920x1080p24 */ + clock-frequency = <52000000>; + hactive = <1920>; + vactive = <1080>; + hfront-porch = <25>; + hback-porch = <25>; + hsync-len = <0 25 25>; + vback-porch = <2>; + vfront-porch = <2>; + vsync-len = <2>; + hsync-active = <1>; + pixelclk-active = <1>; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/display/panel/panel-common.yaml b/Documentation/devicetree/bindings/display/panel/panel-common.yaml index 8070c439adbd..ed051ba12084 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-common.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-common.yaml @@ -61,6 +61,14 @@ properties: allOf: - $ref: panel-timing.yaml# + display-timings: + description: + Some display panels supports several resolutions with different timing. + The display-timings bindings supports specifying several timings and + optional specify which is the native mode. + allOf: + - $ref: display-timings.yaml# + # Connectivity port: type: object -- cgit v1.2.3 From 4e840bea0cce68968a9542d67407554b6c45ad18 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 16 Feb 2020 19:15:11 +0100 Subject: dt-bindings: display: convert panel-dpi to DT schema With panel-timing converted, now convert the single remaining .txt user in panel/ of panel-timing to DT schema. v2: - Drop Thierry as maintainer, as this is not a general panel binding and I have no acks. - Drop requirement for a panel- specific binding - "panel-dpi" is enough - Updated example v3: - added yaml document terminator "..." - always require a specific binding - panel-dpi (based on feedback from Rob) - use "power-supply" for the supply property, and made it mandatory "power-supply" is the standard property for panels Signed-off-by: Sam Ravnborg Reviewed-by: Rob Herring Acked-by: Maxime Ripard Cc: Rob Herring Cc: Thierry Reding Cc: Laurent Pinchart Cc: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200216181513.28109-4-sam@ravnborg.org --- .../bindings/display/panel/panel-dpi.txt | 50 --------------- .../bindings/display/panel/panel-dpi.yaml | 72 ++++++++++++++++++++++ 2 files changed, 72 insertions(+), 50 deletions(-) delete mode 100644 Documentation/devicetree/bindings/display/panel/panel-dpi.txt create mode 100644 Documentation/devicetree/bindings/display/panel/panel-dpi.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panel-dpi.txt b/Documentation/devicetree/bindings/display/panel/panel-dpi.txt deleted file mode 100644 index 6b203bc4d932..000000000000 --- a/Documentation/devicetree/bindings/display/panel/panel-dpi.txt +++ /dev/null @@ -1,50 +0,0 @@ -Generic MIPI DPI Panel -====================== - -Required properties: -- compatible: "panel-dpi" - -Optional properties: -- label: a symbolic name for the panel -- enable-gpios: panel enable gpio -- reset-gpios: GPIO to control the RESET pin -- vcc-supply: phandle of regulator that will be used to enable power to the display -- backlight: phandle of the backlight device - -Required nodes: -- "panel-timing" containing video timings - (Documentation/devicetree/bindings/display/panel/display-timing.txt) -- Video port for DPI input - -Example -------- - -lcd0: display@0 { - compatible = "samsung,lte430wq-f0c", "panel-dpi"; - label = "lcd"; - - backlight = <&backlight>; - - port { - lcd_in: endpoint { - remote-endpoint = <&dpi_out>; - }; - }; - - panel-timing { - clock-frequency = <9200000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <8>; - hback-porch = <4>; - hsync-len = <41>; - vback-porch = <2>; - vfront-porch = <4>; - vsync-len = <10>; - - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; -}; diff --git a/Documentation/devicetree/bindings/display/panel/panel-dpi.yaml b/Documentation/devicetree/bindings/display/panel/panel-dpi.yaml new file mode 100644 index 000000000000..40079fc24a63 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/panel-dpi.yaml @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/panel-dpi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Generic MIPI DPI Panel + +maintainers: + - Sam Ravnborg + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + description: + Shall contain a panel specific compatible and "panel-dpi" + in that order. + items: + - {} + - const: panel-dpi + + backlight: true + enable-gpios: true + height-mm: true + label: true + panel-timing: true + port: true + power-supply: true + reset-gpios: true + width-mm: true + +required: + - panel-timing + - power-supply + +additionalProperties: false + +examples: + - | + panel@0 { + compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + label = "osddisplay"; + power-supply = <&vcc_supply>; + + backlight = <&backlight>; + + port { + lcd_in: endpoint { + remote-endpoint = <&dpi_out>; + }; + }; + panel-timing { + clock-frequency = <9200000>; + hactive = <800>; + vactive = <480>; + hfront-porch = <8>; + hback-porch = <4>; + hsync-len = <41>; + vback-porch = <2>; + vfront-porch = <4>; + vsync-len = <10>; + + hsync-active = <0>; + vsync-active = <0>; + de-active = <1>; + pixelclk-active = <1>; + }; + }; + +... -- cgit v1.2.3 From fa10224087f1fa7cbd3ba0b46dedc1a1fec3d219 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 16 Feb 2020 19:15:12 +0100 Subject: dt-bindings: display: add data-mapping to panel-dpi Add data-mapping property that can be used to specify the media format used for the connection betwwen the display controller (connector) and the panel. v2: - drop lvds666 (Rob) Signed-off-by: Sam Ravnborg Reviewed-by: Rob Herring Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200216181513.28109-5-sam@ravnborg.org --- .../devicetree/bindings/display/panel/panel-dpi.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/display/panel/panel-dpi.yaml b/Documentation/devicetree/bindings/display/panel/panel-dpi.yaml index 40079fc24a63..5275d350f8cb 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-dpi.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-dpi.yaml @@ -21,6 +21,15 @@ properties: - {} - const: panel-dpi + data-mapping: + enum: + - rgb24 + - rgb565 + - bgr666 + description: | + Describes the media format, how the display panel is connected + to the display interface. + backlight: true enable-gpios: true height-mm: true @@ -43,7 +52,7 @@ examples: compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; label = "osddisplay"; power-supply = <&vcc_supply>; - + data-mapping = "rgb565"; backlight = <&backlight>; port { -- cgit v1.2.3 From 8c8b07cb08107abf11842ee7c29ba77ddc6ce568 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 18 Feb 2020 16:31:25 +0200 Subject: dt-bindings: dma: ti: k3-udma: Update for atype support (virtualization) In UDMA each channel can use different ATYPE value which tells UDMA how the addresses in the descriptors should be treated: 0: pointers are physical addresses (no translation) 1: pointers are intermediate addresses (PVU) 2: pointers are virtual addresses (SMMU) When virtualized environment is used then the dma binding should use additional cell to configure the desired ATYPE for the channel. Signed-off-by: Peter Ujfalusi Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20200218143126.11361-2-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti/k3-udma.yaml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml index 8b5c346f23f6..567bb820b182 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml @@ -45,7 +45,8 @@ allOf: properties: "#dma-cells": - const: 1 + minimum: 1 + maximum: 2 description: | The cell is the PSI-L thread ID of the remote (to UDMAP) end. Valid ranges for thread ID depends on the data movement direction: @@ -55,6 +56,8 @@ properties: Please refer to the device documentation for the PSI-L thread map and also the PSI-L peripheral chapter for the correct thread ID. + When #dma-cells is 2, the second parameter is the channel ATYPE. + compatible: enum: - ti,am654-navss-main-udmap @@ -131,6 +134,20 @@ required: - ti,sci-rm-range-rchan - ti,sci-rm-range-rflow +if: + properties: + "#dma-cells": + const: 2 +then: + properties: + ti,udma-atype: + description: ATYPE value which should be used by non slave channels + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + + required: + - ti,udma-atype + examples: - |+ cbass_main { -- cgit v1.2.3 From b9fb56b6ba8abc96484f007973ca00e30b104422 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Fri, 21 Feb 2020 16:52:29 +0900 Subject: dt-bindings: dmaengine: Add UniPhier external DMA controller bindings Add devicetree binding documentation for external DMA controller implemented on Socionext UniPhier SOCs. Signed-off-by: Kunihiko Hayashi Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/1582271550-3403-2-git-send-email-hayashi.kunihiko@socionext.com Signed-off-by: Vinod Koul --- .../bindings/dma/socionext,uniphier-xdmac.yaml | 63 ++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 Documentation/devicetree/bindings/dma/socionext,uniphier-xdmac.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/dma/socionext,uniphier-xdmac.yaml b/Documentation/devicetree/bindings/dma/socionext,uniphier-xdmac.yaml new file mode 100644 index 000000000000..86cfb599256e --- /dev/null +++ b/Documentation/devicetree/bindings/dma/socionext,uniphier-xdmac.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/dma/socionext,uniphier-xdmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Socionext UniPhier external DMA controller + +description: | + This describes the devicetree bindings for an external DMA engine to perform + memory-to-memory or peripheral-to-memory data transfer capable of supporting + 16 channels, implemented in Socionext UniPhier SoCs. + +maintainers: + - Kunihiko Hayashi + +allOf: + - $ref: "dma-controller.yaml#" + +properties: + compatible: + const: socionext,uniphier-xdmac + + reg: + items: + - description: XDMAC base register region (offset and length) + - description: XDMAC extension register region (offset and length) + + interrupts: + maxItems: 1 + + "#dma-cells": + const: 2 + description: | + DMA request from clients consists of 2 cells: + 1. Channel index + 2. Transfer request factor number, If no transfer factor, use 0. + The number is SoC-specific, and this should be specified with + relation to the device to use the DMA controller. + + dma-channels: + minimum: 1 + maximum: 16 + +additionalProperties: false + +required: + - compatible + - reg + - interrupts + - "#dma-cells" + +examples: + - | + xdmac: dma-controller@5fc10000 { + compatible = "socionext,uniphier-xdmac"; + reg = <0x5fc10000 0x1000>, <0x5fc20000 0x800>; + interrupts = <0 188 4>; + #dma-cells = <2>; + dma-channels = <16>; + }; + +... -- cgit v1.2.3 From ca408da08ad612cfc68a27d897d08d11da04d3e2 Mon Sep 17 00:00:00 2001 From: Mohammad Rasim Date: Fri, 14 Feb 2020 09:58:01 +0100 Subject: media: dt-bindings: media: add new kii pro key map Add new entry for rc-videostrong-kii-pro in linux,rc-map-name Signed-off-by: Mohammad Rasim Acked-by: Rob Herring Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- Documentation/devicetree/bindings/media/rc.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/rc.yaml b/Documentation/devicetree/bindings/media/rc.yaml index a64ee038d235..b27c9385d490 100644 --- a/Documentation/devicetree/bindings/media/rc.yaml +++ b/Documentation/devicetree/bindings/media/rc.yaml @@ -143,6 +143,7 @@ properties: - rc-videomate-k100 - rc-videomate-s350 - rc-videomate-tv-pvr + - rc-videostrong-kii-pro - rc-wetek-hub - rc-wetek-play2 - rc-winfast -- cgit v1.2.3 From 5e815fe05d0b3e4e4fcde2e09d5f362b3d65010f Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Thu, 20 Feb 2020 17:30:11 +0100 Subject: media: uapi: h264: Add DPB entry field reference flags Using the field information attached to v4l2 buffers is not enough to determine the type of field referenced by a DPB entry: the decoded frame might contain the full picture (both top and bottom fields) but the reference only point to one of them. Let's add new V4L2_H264_DPB_ENTRY_FLAG_ flags to express that. [Keep only 2 flags and add some details about they mean] Signed-off-by: Jonas Karlman Signed-off-by: Boris Brezillon Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/uapi/v4l/ext-ctrls-codec.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'Documentation') diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst index 28313c0f4e7c..d4fc5f25aa14 100644 --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst @@ -2028,6 +2028,22 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - * - ``V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM`` - 0x00000004 - The DPB entry is a long term reference frame + * - ``V4L2_H264_DPB_ENTRY_FLAG_FIELD`` + - 0x00000008 + - The DPB entry is a field reference, which means only one of the field + will be used when decoding the new frame/field. When not set the DPB + entry is a frame reference (both fields will be used). Note that this + flag does not say anything about the number of fields contained in the + reference frame, it just describes the one used to decode the new + field/frame + * - ``V4L2_H264_DPB_ENTRY_FLAG_BOTTOM_FIELD`` + - 0x00000010 + - The DPB entry is a bottom field reference (only the bottom field of the + reference frame is needed to decode the new frame/field). Only valid if + V4L2_H264_DPB_ENTRY_FLAG_FIELD is set. When + V4L2_H264_DPB_ENTRY_FLAG_FIELD is set but + V4L2_H264_DPB_ENTRY_FLAG_BOTTOM_FIELD is not, that means the + DPB entry is a top field reference ``V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE (enum)`` Specifies the decoding mode to use. Currently exposes slice-based and -- cgit v1.2.3 From 03b22496502dacb8a4f3b65e08fcde1d606cbff2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Feb 2020 23:30:48 +0100 Subject: Documentation: cpufreq: Move legacy driver documentation There are three legacy driver documents in Documentation/cpu-freq/ that were added years ago and converting them each to the .rst format is rather pointless, even though there is some value in preserving them. However, if they are preserved, they need to go into the admin-guide part of cpufreq documentation where they belong (at least to a certain extent). To preserve them with minimum amount of changes and put them into the right place, and make it possible to process them into HTML (and other formats) along with the rest of the documentation, move them each as a "literal text" block into a separate section of a single .rst "wrapper" file under Documentation/admin-guide/pm/. While at it, repair the PCC specification URL in one of them. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/cpufreq_drivers.rst | 274 +++++++++++++++++++++++ Documentation/admin-guide/pm/working-state.rst | 1 + Documentation/cpu-freq/amd-powernow.txt | 38 ---- Documentation/cpu-freq/cpufreq-nforce2.txt | 19 -- Documentation/cpu-freq/pcc-cpufreq.txt | 207 ----------------- 5 files changed, 275 insertions(+), 264 deletions(-) create mode 100644 Documentation/admin-guide/pm/cpufreq_drivers.rst delete mode 100644 Documentation/cpu-freq/amd-powernow.txt delete mode 100644 Documentation/cpu-freq/cpufreq-nforce2.txt delete mode 100644 Documentation/cpu-freq/pcc-cpufreq.txt (limited to 'Documentation') diff --git a/Documentation/admin-guide/pm/cpufreq_drivers.rst b/Documentation/admin-guide/pm/cpufreq_drivers.rst new file mode 100644 index 000000000000..9a134ae65803 --- /dev/null +++ b/Documentation/admin-guide/pm/cpufreq_drivers.rst @@ -0,0 +1,274 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================================= +Legacy Documentation of CPU Performance Scaling Drivers +======================================================= + +Included below are historic documents describing assorted +:doc:`CPU performance scaling ` drivers. They are reproduced verbatim, +with the original white space formatting and indentation preserved, except for +the added leading space character in every line of text. + + +AMD PowerNow! Drivers +===================== + +:: + + PowerNow! and Cool'n'Quiet are AMD names for frequency + management capabilities in AMD processors. As the hardware + implementation changes in new generations of the processors, + there is a different cpu-freq driver for each generation. + + Note that the driver's will not load on the "wrong" hardware, + so it is safe to try each driver in turn when in doubt as to + which is the correct driver. + + Note that the functionality to change frequency (and voltage) + is not available in all processors. The drivers will refuse + to load on processors without this capability. The capability + is detected with the cpuid instruction. + + The drivers use BIOS supplied tables to obtain frequency and + voltage information appropriate for a particular platform. + Frequency transitions will be unavailable if the BIOS does + not supply these tables. + + 6th Generation: powernow-k6 + + 7th Generation: powernow-k7: Athlon, Duron, Geode. + + 8th Generation: powernow-k8: Athlon, Athlon 64, Opteron, Sempron. + Documentation on this functionality in 8th generation processors + is available in the "BIOS and Kernel Developer's Guide", publication + 26094, in chapter 9, available for download from www.amd.com. + + BIOS supplied data, for powernow-k7 and for powernow-k8, may be + from either the PSB table or from ACPI objects. The ACPI support + is only available if the kernel config sets CONFIG_ACPI_PROCESSOR. + The powernow-k8 driver will attempt to use ACPI if so configured, + and fall back to PST if that fails. + The powernow-k7 driver will try to use the PSB support first, and + fall back to ACPI if the PSB support fails. A module parameter, + acpi_force, is provided to force ACPI support to be used instead + of PSB support. + + +``cpufreq-nforce2`` +=================== + +:: + + The cpufreq-nforce2 driver changes the FSB on nVidia nForce2 platforms. + + This works better than on other platforms, because the FSB of the CPU + can be controlled independently from the PCI/AGP clock. + + The module has two options: + + fid: multiplier * 10 (for example 8.5 = 85) + min_fsb: minimum FSB + + If not set, fid is calculated from the current CPU speed and the FSB. + min_fsb defaults to FSB at boot time - 50 MHz. + + IMPORTANT: The available range is limited downwards! + Also the minimum available FSB can differ, for systems + booting with 200 MHz, 150 should always work. + + +``pcc-cpufreq`` +=============== + +:: + + /* + * pcc-cpufreq.txt - PCC interface documentation + * + * Copyright (C) 2009 Red Hat, Matthew Garrett + * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. + * Nagananda Chumbalkar + */ + + + Processor Clocking Control Driver + --------------------------------- + + Contents: + --------- + 1. Introduction + 1.1 PCC interface + 1.1.1 Get Average Frequency + 1.1.2 Set Desired Frequency + 1.2 Platforms affected + 2. Driver and /sys details + 2.1 scaling_available_frequencies + 2.2 cpuinfo_transition_latency + 2.3 cpuinfo_cur_freq + 2.4 related_cpus + 3. Caveats + + 1. Introduction: + ---------------- + Processor Clocking Control (PCC) is an interface between the platform + firmware and OSPM. It is a mechanism for coordinating processor + performance (ie: frequency) between the platform firmware and the OS. + + The PCC driver (pcc-cpufreq) allows OSPM to take advantage of the PCC + interface. + + OS utilizes the PCC interface to inform platform firmware what frequency the + OS wants for a logical processor. The platform firmware attempts to achieve + the requested frequency. If the request for the target frequency could not be + satisfied by platform firmware, then it usually means that power budget + conditions are in place, and "power capping" is taking place. + + 1.1 PCC interface: + ------------------ + The complete PCC specification is available here: + https://acpica.org/sites/acpica/files/Processor-Clocking-Control-v1p0.pdf + + PCC relies on a shared memory region that provides a channel for communication + between the OS and platform firmware. PCC also implements a "doorbell" that + is used by the OS to inform the platform firmware that a command has been + sent. + + The ACPI PCCH() method is used to discover the location of the PCC shared + memory region. The shared memory region header contains the "command" and + "status" interface. PCCH() also contains details on how to access the platform + doorbell. + + The following commands are supported by the PCC interface: + * Get Average Frequency + * Set Desired Frequency + + The ACPI PCCP() method is implemented for each logical processor and is + used to discover the offsets for the input and output buffers in the shared + memory region. + + When PCC mode is enabled, the platform will not expose processor performance + or throttle states (_PSS, _TSS and related ACPI objects) to OSPM. Therefore, + the native P-state driver (such as acpi-cpufreq for Intel, powernow-k8 for + AMD) will not load. + + However, OSPM remains in control of policy. The governor (eg: "ondemand") + computes the required performance for each processor based on server workload. + The PCC driver fills in the command interface, and the input buffer and + communicates the request to the platform firmware. The platform firmware is + responsible for delivering the requested performance. + + Each PCC command is "global" in scope and can affect all the logical CPUs in + the system. Therefore, PCC is capable of performing "group" updates. With PCC + the OS is capable of getting/setting the frequency of all the logical CPUs in + the system with a single call to the BIOS. + + 1.1.1 Get Average Frequency: + ---------------------------- + This command is used by the OSPM to query the running frequency of the + processor since the last time this command was completed. The output buffer + indicates the average unhalted frequency of the logical processor expressed as + a percentage of the nominal (ie: maximum) CPU frequency. The output buffer + also signifies if the CPU frequency is limited by a power budget condition. + + 1.1.2 Set Desired Frequency: + ---------------------------- + This command is used by the OSPM to communicate to the platform firmware the + desired frequency for a logical processor. The output buffer is currently + ignored by OSPM. The next invocation of "Get Average Frequency" will inform + OSPM if the desired frequency was achieved or not. + + 1.2 Platforms affected: + ----------------------- + The PCC driver will load on any system where the platform firmware: + * supports the PCC interface, and the associated PCCH() and PCCP() methods + * assumes responsibility for managing the hardware clocking controls in order + to deliver the requested processor performance + + Currently, certain HP ProLiant platforms implement the PCC interface. On those + platforms PCC is the "default" choice. + + However, it is possible to disable this interface via a BIOS setting. In + such an instance, as is also the case on platforms where the PCC interface + is not implemented, the PCC driver will fail to load silently. + + 2. Driver and /sys details: + --------------------------- + When the driver loads, it merely prints the lowest and the highest CPU + frequencies supported by the platform firmware. + + The PCC driver loads with a message such as: + pcc-cpufreq: (v1.00.00) driver loaded with frequency limits: 1600 MHz, 2933 + MHz + + This means that the OPSM can request the CPU to run at any frequency in + between the limits (1600 MHz, and 2933 MHz) specified in the message. + + Internally, there is no need for the driver to convert the "target" frequency + to a corresponding P-state. + + The VERSION number for the driver will be of the format v.xy.ab. + eg: 1.00.02 + ----- -- + | | + | -- this will increase with bug fixes/enhancements to the driver + |-- this is the version of the PCC specification the driver adheres to + + + The following is a brief discussion on some of the fields exported via the + /sys filesystem and how their values are affected by the PCC driver: + + 2.1 scaling_available_frequencies: + ---------------------------------- + scaling_available_frequencies is not created in /sys. No intermediate + frequencies need to be listed because the BIOS will try to achieve any + frequency, within limits, requested by the governor. A frequency does not have + to be strictly associated with a P-state. + + 2.2 cpuinfo_transition_latency: + ------------------------------- + The cpuinfo_transition_latency field is 0. The PCC specification does + not include a field to expose this value currently. + + 2.3 cpuinfo_cur_freq: + --------------------- + A) Often cpuinfo_cur_freq will show a value different than what is declared + in the scaling_available_frequencies or scaling_cur_freq, or scaling_max_freq. + This is due to "turbo boost" available on recent Intel processors. If certain + conditions are met the BIOS can achieve a slightly higher speed than requested + by OSPM. An example: + + scaling_cur_freq : 2933000 + cpuinfo_cur_freq : 3196000 + + B) There is a round-off error associated with the cpuinfo_cur_freq value. + Since the driver obtains the current frequency as a "percentage" (%) of the + nominal frequency from the BIOS, sometimes, the values displayed by + scaling_cur_freq and cpuinfo_cur_freq may not match. An example: + + scaling_cur_freq : 1600000 + cpuinfo_cur_freq : 1583000 + + In this example, the nominal frequency is 2933 MHz. The driver obtains the + current frequency, cpuinfo_cur_freq, as 54% of the nominal frequency: + + 54% of 2933 MHz = 1583 MHz + + Nominal frequency is the maximum frequency of the processor, and it usually + corresponds to the frequency of the P0 P-state. + + 2.4 related_cpus: + ----------------- + The related_cpus field is identical to affected_cpus. + + affected_cpus : 4 + related_cpus : 4 + + Currently, the PCC driver does not evaluate _PSD. The platforms that support + PCC do not implement SW_ALL. So OSPM doesn't need to perform any coordination + to ensure that the same frequency is requested of all dependent CPUs. + + 3. Caveats: + ----------- + The "cpufreq_stats" module in its present form cannot be loaded and + expected to work with the PCC driver. Since the "cpufreq_stats" module + provides information wrt each P-state, it is not applicable to the PCC driver. diff --git a/Documentation/admin-guide/pm/working-state.rst b/Documentation/admin-guide/pm/working-state.rst index 88f717e59a42..0a38cdf39df1 100644 --- a/Documentation/admin-guide/pm/working-state.rst +++ b/Documentation/admin-guide/pm/working-state.rst @@ -11,4 +11,5 @@ Working-State Power Management intel_idle cpufreq intel_pstate + cpufreq_drivers intel_epb diff --git a/Documentation/cpu-freq/amd-powernow.txt b/Documentation/cpu-freq/amd-powernow.txt deleted file mode 100644 index 254da155fa47..000000000000 --- a/Documentation/cpu-freq/amd-powernow.txt +++ /dev/null @@ -1,38 +0,0 @@ - -PowerNow! and Cool'n'Quiet are AMD names for frequency -management capabilities in AMD processors. As the hardware -implementation changes in new generations of the processors, -there is a different cpu-freq driver for each generation. - -Note that the driver's will not load on the "wrong" hardware, -so it is safe to try each driver in turn when in doubt as to -which is the correct driver. - -Note that the functionality to change frequency (and voltage) -is not available in all processors. The drivers will refuse -to load on processors without this capability. The capability -is detected with the cpuid instruction. - -The drivers use BIOS supplied tables to obtain frequency and -voltage information appropriate for a particular platform. -Frequency transitions will be unavailable if the BIOS does -not supply these tables. - -6th Generation: powernow-k6 - -7th Generation: powernow-k7: Athlon, Duron, Geode. - -8th Generation: powernow-k8: Athlon, Athlon 64, Opteron, Sempron. -Documentation on this functionality in 8th generation processors -is available in the "BIOS and Kernel Developer's Guide", publication -26094, in chapter 9, available for download from www.amd.com. - -BIOS supplied data, for powernow-k7 and for powernow-k8, may be -from either the PSB table or from ACPI objects. The ACPI support -is only available if the kernel config sets CONFIG_ACPI_PROCESSOR. -The powernow-k8 driver will attempt to use ACPI if so configured, -and fall back to PST if that fails. -The powernow-k7 driver will try to use the PSB support first, and -fall back to ACPI if the PSB support fails. A module parameter, -acpi_force, is provided to force ACPI support to be used instead -of PSB support. diff --git a/Documentation/cpu-freq/cpufreq-nforce2.txt b/Documentation/cpu-freq/cpufreq-nforce2.txt deleted file mode 100644 index babce1315026..000000000000 --- a/Documentation/cpu-freq/cpufreq-nforce2.txt +++ /dev/null @@ -1,19 +0,0 @@ - -The cpufreq-nforce2 driver changes the FSB on nVidia nForce2 platforms. - -This works better than on other platforms, because the FSB of the CPU -can be controlled independently from the PCI/AGP clock. - -The module has two options: - - fid: multiplier * 10 (for example 8.5 = 85) - min_fsb: minimum FSB - -If not set, fid is calculated from the current CPU speed and the FSB. -min_fsb defaults to FSB at boot time - 50 MHz. - -IMPORTANT: The available range is limited downwards! - Also the minimum available FSB can differ, for systems - booting with 200 MHz, 150 should always work. - - diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt deleted file mode 100644 index 9e3c3b33514c..000000000000 --- a/Documentation/cpu-freq/pcc-cpufreq.txt +++ /dev/null @@ -1,207 +0,0 @@ -/* - * pcc-cpufreq.txt - PCC interface documentation - * - * Copyright (C) 2009 Red Hat, Matthew Garrett - * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. - * Nagananda Chumbalkar - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON - * INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - * - * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - */ - - - Processor Clocking Control Driver - --------------------------------- - -Contents: ---------- -1. Introduction -1.1 PCC interface -1.1.1 Get Average Frequency -1.1.2 Set Desired Frequency -1.2 Platforms affected -2. Driver and /sys details -2.1 scaling_available_frequencies -2.2 cpuinfo_transition_latency -2.3 cpuinfo_cur_freq -2.4 related_cpus -3. Caveats - -1. Introduction: ----------------- -Processor Clocking Control (PCC) is an interface between the platform -firmware and OSPM. It is a mechanism for coordinating processor -performance (ie: frequency) between the platform firmware and the OS. - -The PCC driver (pcc-cpufreq) allows OSPM to take advantage of the PCC -interface. - -OS utilizes the PCC interface to inform platform firmware what frequency the -OS wants for a logical processor. The platform firmware attempts to achieve -the requested frequency. If the request for the target frequency could not be -satisfied by platform firmware, then it usually means that power budget -conditions are in place, and "power capping" is taking place. - -1.1 PCC interface: ------------------- -The complete PCC specification is available here: -http://www.acpica.org/download/Processor-Clocking-Control-v1p0.pdf - -PCC relies on a shared memory region that provides a channel for communication -between the OS and platform firmware. PCC also implements a "doorbell" that -is used by the OS to inform the platform firmware that a command has been -sent. - -The ACPI PCCH() method is used to discover the location of the PCC shared -memory region. The shared memory region header contains the "command" and -"status" interface. PCCH() also contains details on how to access the platform -doorbell. - -The following commands are supported by the PCC interface: -* Get Average Frequency -* Set Desired Frequency - -The ACPI PCCP() method is implemented for each logical processor and is -used to discover the offsets for the input and output buffers in the shared -memory region. - -When PCC mode is enabled, the platform will not expose processor performance -or throttle states (_PSS, _TSS and related ACPI objects) to OSPM. Therefore, -the native P-state driver (such as acpi-cpufreq for Intel, powernow-k8 for -AMD) will not load. - -However, OSPM remains in control of policy. The governor (eg: "ondemand") -computes the required performance for each processor based on server workload. -The PCC driver fills in the command interface, and the input buffer and -communicates the request to the platform firmware. The platform firmware is -responsible for delivering the requested performance. - -Each PCC command is "global" in scope and can affect all the logical CPUs in -the system. Therefore, PCC is capable of performing "group" updates. With PCC -the OS is capable of getting/setting the frequency of all the logical CPUs in -the system with a single call to the BIOS. - -1.1.1 Get Average Frequency: ----------------------------- -This command is used by the OSPM to query the running frequency of the -processor since the last time this command was completed. The output buffer -indicates the average unhalted frequency of the logical processor expressed as -a percentage of the nominal (ie: maximum) CPU frequency. The output buffer -also signifies if the CPU frequency is limited by a power budget condition. - -1.1.2 Set Desired Frequency: ----------------------------- -This command is used by the OSPM to communicate to the platform firmware the -desired frequency for a logical processor. The output buffer is currently -ignored by OSPM. The next invocation of "Get Average Frequency" will inform -OSPM if the desired frequency was achieved or not. - -1.2 Platforms affected: ------------------------ -The PCC driver will load on any system where the platform firmware: -* supports the PCC interface, and the associated PCCH() and PCCP() methods -* assumes responsibility for managing the hardware clocking controls in order -to deliver the requested processor performance - -Currently, certain HP ProLiant platforms implement the PCC interface. On those -platforms PCC is the "default" choice. - -However, it is possible to disable this interface via a BIOS setting. In -such an instance, as is also the case on platforms where the PCC interface -is not implemented, the PCC driver will fail to load silently. - -2. Driver and /sys details: ---------------------------- -When the driver loads, it merely prints the lowest and the highest CPU -frequencies supported by the platform firmware. - -The PCC driver loads with a message such as: -pcc-cpufreq: (v1.00.00) driver loaded with frequency limits: 1600 MHz, 2933 -MHz - -This means that the OPSM can request the CPU to run at any frequency in -between the limits (1600 MHz, and 2933 MHz) specified in the message. - -Internally, there is no need for the driver to convert the "target" frequency -to a corresponding P-state. - -The VERSION number for the driver will be of the format v.xy.ab. -eg: 1.00.02 - ----- -- - | | - | -- this will increase with bug fixes/enhancements to the driver - |-- this is the version of the PCC specification the driver adheres to - - -The following is a brief discussion on some of the fields exported via the -/sys filesystem and how their values are affected by the PCC driver: - -2.1 scaling_available_frequencies: ----------------------------------- -scaling_available_frequencies is not created in /sys. No intermediate -frequencies need to be listed because the BIOS will try to achieve any -frequency, within limits, requested by the governor. A frequency does not have -to be strictly associated with a P-state. - -2.2 cpuinfo_transition_latency: -------------------------------- -The cpuinfo_transition_latency field is 0. The PCC specification does -not include a field to expose this value currently. - -2.3 cpuinfo_cur_freq: ---------------------- -A) Often cpuinfo_cur_freq will show a value different than what is declared -in the scaling_available_frequencies or scaling_cur_freq, or scaling_max_freq. -This is due to "turbo boost" available on recent Intel processors. If certain -conditions are met the BIOS can achieve a slightly higher speed than requested -by OSPM. An example: - -scaling_cur_freq : 2933000 -cpuinfo_cur_freq : 3196000 - -B) There is a round-off error associated with the cpuinfo_cur_freq value. -Since the driver obtains the current frequency as a "percentage" (%) of the -nominal frequency from the BIOS, sometimes, the values displayed by -scaling_cur_freq and cpuinfo_cur_freq may not match. An example: - -scaling_cur_freq : 1600000 -cpuinfo_cur_freq : 1583000 - -In this example, the nominal frequency is 2933 MHz. The driver obtains the -current frequency, cpuinfo_cur_freq, as 54% of the nominal frequency: - - 54% of 2933 MHz = 1583 MHz - -Nominal frequency is the maximum frequency of the processor, and it usually -corresponds to the frequency of the P0 P-state. - -2.4 related_cpus: ------------------ -The related_cpus field is identical to affected_cpus. - -affected_cpus : 4 -related_cpus : 4 - -Currently, the PCC driver does not evaluate _PSD. The platforms that support -PCC do not implement SW_ALL. So OSPM doesn't need to perform any coordination -to ensure that the same frequency is requested of all dependent CPUs. - -3. Caveats: ------------ -The "cpufreq_stats" module in its present form cannot be loaded and -expected to work with the PCC driver. Since the "cpufreq_stats" module -provides information wrt each P-state, it is not applicable to the PCC driver. -- cgit v1.2.3 From 8fd390b89cc8ca786bc1b66d8a76512a6068b081 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Fri, 24 Jan 2020 21:35:43 +0100 Subject: media: Split v4l2_pipeline_pm_use into v4l2_pipeline_pm_{get, put} Currently, v4l2_pipeline_pm_use() prototype is: int v4l2_pipeline_pm_use(struct media_entity *entity, int use) Where the 'use' argument shall only be set to '1' for enable/power-on, or to '0' for disable/power-off. The integer return is specified as only meaningful when 'use' is set to '1'. Let's enforce this semantic by splitting the function in two: v4l2_pipeline_pm_get and v4l2_pipeline_pm_put. This is done for several reasons. It makes the API easier to use (or harder to misuse). It removes the constraint on the values the 'use' argument shall take. Also, it removes the need to constraint the return value, by making v4l2_pipeline_pm_put void return. And last, it's more consistent with other kernel APIs, such as the runtime pm APIs, which makes the code more symmetric. Signed-off-by: Ezequiel Garcia Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/kapi/csi2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/media/kapi/csi2.rst b/Documentation/media/kapi/csi2.rst index 030a5c41ec75..e111ff7bfd3d 100644 --- a/Documentation/media/kapi/csi2.rst +++ b/Documentation/media/kapi/csi2.rst @@ -74,7 +74,7 @@ Before the receiver driver may enable the CSI-2 transmitter by using the :c:type:`v4l2_subdev_video_ops`->s_stream(), it must have powered the transmitter up by using the :c:type:`v4l2_subdev_core_ops`->s_power() callback. This may take -place either indirectly by using :c:func:`v4l2_pipeline_pm_use` or +place either indirectly by using :c:func:`v4l2_pipeline_pm_get` or directly. Formats -- cgit v1.2.3 From 02fd2782fcfd6e69c6fd8fe1cd376208746f4f3f Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sat, 25 Jan 2020 00:20:11 +0100 Subject: media: dt-bindings: media: Add Allwinner A83T Rotate driver Some Allwinner SoCs like A83T and A64 contain rotate core which can rotate and flip images. Add a binding for it. Signed-off-by: Jernej Skrabec Reviewed-by: Rob Herring Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../media/allwinner,sun8i-a83t-de2-rotate.yaml | 70 ++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml b/Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml new file mode 100644 index 000000000000..75196d11da58 --- /dev/null +++ b/Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/allwinner,sun8i-a83t-de2-rotate.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Allwinner A83T DE2 Rotate Device Tree Bindings + +maintainers: + - Jernej Skrabec + - Chen-Yu Tsai + - Maxime Ripard + +description: |- + The Allwinner A83T and A64 have a rotation core used for + rotating and flipping images. + +properties: + compatible: + oneOf: + - const: allwinner,sun8i-a83t-de2-rotate + - items: + - const: allwinner,sun50i-a64-de2-rotate + - const: allwinner,sun8i-a83t-de2-rotate + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: Rotate interface clock + - description: Rotate module clock + + clock-names: + items: + - const: bus + - const: mod + + resets: + maxItems: 1 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + #include + #include + #include + + rotate: rotate@1020000 { + compatible = "allwinner,sun8i-a83t-de2-rotate"; + reg = <0x1020000 0x10000>; + interrupts = ; + clocks = <&display_clocks CLK_BUS_ROT>, + <&display_clocks CLK_ROT>; + clock-names = "bus", + "mod"; + resets = <&display_clocks RST_ROT>; + }; + +... -- cgit v1.2.3 From 18e2565d7d154ddc7458e05cab67ef6f43b8f640 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Mon, 10 Feb 2020 18:06:55 +0100 Subject: media: dt-bindings: media: Add Allwinner A64 deinterlace compatible Allwinner A64 SoC also contains deinterlace core, compatible to H3. Add compatible string for it. Acked-by: Rob Herring Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml b/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml index 2e40f700e84f..8707df613f6c 100644 --- a/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml +++ b/Documentation/devicetree/bindings/media/allwinner,sun8i-h3-deinterlace.yaml @@ -17,7 +17,11 @@ description: |- properties: compatible: - const: allwinner,sun8i-h3-deinterlace + oneOf: + - const: allwinner,sun8i-h3-deinterlace + - items: + - const: allwinner,sun50i-a64-deinterlace + - const: allwinner,sun8i-h3-deinterlace reg: maxItems: 1 -- cgit v1.2.3 From 8bc8c9ba03c4d4966c2503e189801fbc9130b747 Mon Sep 17 00:00:00 2001 From: Vandana BN Date: Thu, 27 Feb 2020 06:39:09 +0100 Subject: media: Documentation:media:v4l-drivers: Update vivid documentation. Add metadata capture/output and v4l-touch support in vivid documentation. Signed-off-by: Vandana BN [hverkuil-cisco@xs4all.nl: fixed some typos, made some small tweaks of the text] Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/media/v4l-drivers/vivid.rst | 63 +++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/media/v4l-drivers/vivid.rst b/Documentation/media/v4l-drivers/vivid.rst index 7082fec4075d..52e57b773f07 100644 --- a/Documentation/media/v4l-drivers/vivid.rst +++ b/Documentation/media/v4l-drivers/vivid.rst @@ -4,9 +4,9 @@ The Virtual Video Test Driver (vivid) ===================================== This driver emulates video4linux hardware of various types: video capture, video -output, vbi capture and output, radio receivers and transmitters and a software -defined radio receiver. In addition a simple framebuffer device is available for -testing capture and output overlays. +output, vbi capture and output, metadata capture and output, radio receivers and +transmitters, touch capture and a software defined radio receiver. In addition a +simple framebuffer device is available for testing capture and output overlays. Up to 64 vivid instances can be created, each with up to 16 inputs and 16 outputs. @@ -36,6 +36,8 @@ This document describes the features implemented by this driver: - Radio receiver and transmitter support, including RDS support - Software defined radio (SDR) support - Capture and output overlay support +- Metadata capture and output support +- Touch capture support These features will be described in more detail below. @@ -69,6 +71,9 @@ all configurable using the following module options: - bit 10-11: VBI Output node: 0 = none, 1 = raw vbi, 2 = sliced vbi, 3 = both - bit 12: Radio Transmitter node - bit 16: Framebuffer for testing overlays + - bit 17: Metadata Capture node + - bit 18: Metadata Output node + - bit 19: Touch Capture node So to create four instances, the first two with just one video capture device, the second two with just one video output device you would pass @@ -175,6 +180,21 @@ all configurable using the following module options: give the desired swradioX start number for each SDR capture device. The default is -1 which will just take the first free number. +- meta_cap_nr: + + give the desired videoX start number for each metadata capture device. + The default is -1 which will just take the first free number. + +- meta_out_nr: + + give the desired videoX start number for each metadata output device. + The default is -1 which will just take the first free number. + +- touch_cap_nr: + + give the desired v4l-touchX start number for each touch capture device. + The default is -1 which will just take the first free number. + - ccs_cap_mode: specify the allowed video capture crop/compose/scaling combination @@ -547,6 +567,33 @@ The generated data contains the In-phase and Quadrature components of a 1 kHz tone that has an amplitude of sqrt(2). +Metadata Capture +---------------- + +The Metadata capture generates UVC format metadata. The PTS and SCR are +transmitted based on the values set in vivid contols. + +The Metadata device will only work for the Webcam input, it will give +back an error for all other inputs. + + +Metadata Output +--------------- + +The Metadata output can be used to set brightness, contrast, saturation and hue. + +The Metadata device will only work for the Webcam output, it will give +back an error for all other outputs. + + +Touch Capture +------------- + +The Touch capture generates touch patterns simulating single tap, double tap, +triple tap, move from left to right, zoom in, zoom out, palm press (simulating +a large area being pressed on a touchpad), and simulating 16 simultaneous +touch points. + Controls -------- @@ -1049,6 +1096,16 @@ FM Radio Modulator Controls to pass the RDS blocks to the driver, or "Controls" where the RDS data is Provided by the RDS controls mentioned above. +Metadata Capture Controls +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Generate PTS + + if set, then the generated metadata stream contains Presentation timestamp. + +- Generate SCR + + if set, then the generated metadata stream contains Source Clock information. Video, VBI and RDS Looping -------------------------- -- cgit v1.2.3 From 0a464ea4dc1248e8e640ae0f7eee90b99732eaf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 29 Feb 2020 18:35:14 +0100 Subject: docs: dev-tools: gcov: Remove a stray single-quote MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20200229173515.13868-1-j.neuschaefer@gmx.net Signed-off-by: Jonathan Corbet --- Documentation/dev-tools/gcov.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst index 46aae52a41d0..7bd013596217 100644 --- a/Documentation/dev-tools/gcov.rst +++ b/Documentation/dev-tools/gcov.rst @@ -203,7 +203,7 @@ Cause may not correctly copy files from sysfs. Solution - Use ``cat``' to read ``.gcda`` files and ``cp -d`` to copy links. + Use ``cat`` to read ``.gcda`` files and ``cp -d`` to copy links. Alternatively use the mechanism shown in Appendix B. -- cgit v1.2.3 From 7fe068dba8335ff0f9ec608db9589b1fce4663e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 29 Feb 2020 14:27:48 +0100 Subject: docs: admin-guide: kernel-parameters: Document earlycon options for i.MX UARTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/tty/serial/imx.c implements these earlycon options. Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20200229132750.2783-1-j.neuschaefer@gmx.net Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 56bf9b2a9ddf..3e3fd0d19e53 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1095,6 +1095,12 @@ A valid base address must be provided, and the serial port must already be setup and configured. + ec_imx21, + ec_imx6q, + Start an early, polled-mode, output-only console on the + Freescale i.MX UART at the specified address. The UART + must already be setup and configured. + ar3700_uart, Start an early, polled-mode console on the Armada 3700 serial port at the specified -- cgit v1.2.3 From adf3f38a87bbd8b8b69487988c7e8392d141f558 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 28 Feb 2020 21:41:45 +0100 Subject: docs: kernel-docs: Remove "Here is its" at the end of lines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before commit 9e03ea7f683e ("Documentation/kernel-docs.txt: convert it to ReST markup"), it read: Description: Linux Journal Kernel Korner article. Here is its abstract: "..." In Sphinx' HTML formatting, however, the "Here is its" doesn't make sense anymore, because the "Abstract:" is clearly separated. Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20200228204147.8622-1-j.neuschaefer@gmx.net Signed-off-by: Jonathan Corbet --- Documentation/process/kernel-docs.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/process/kernel-docs.rst b/Documentation/process/kernel-docs.rst index 7a45a8e36ea7..9d6d0ac4fca9 100644 --- a/Documentation/process/kernel-docs.rst +++ b/Documentation/process/kernel-docs.rst @@ -313,7 +313,7 @@ On-line docs :URL: http://www.linuxjournal.com/article.php?sid=2391 :Date: 1997 :Keywords: RAID, MD driver. - :Description: Linux Journal Kernel Korner article. Here is its + :Description: Linux Journal Kernel Korner article. :Abstract: *A description of the implementation of the RAID-1, RAID-4 and RAID-5 personalities of the MD device driver in the Linux kernel, providing users with high performance and reliable, @@ -338,7 +338,7 @@ On-line docs :Date: 1996 :Keywords: device driver, module, loading/unloading modules, allocating resources. - :Description: Linux Journal Kernel Korner article. Here is its + :Description: Linux Journal Kernel Korner article. :Abstract: *This is the first of a series of four articles co-authored by Alessandro Rubini and Georg Zezchwitz which present a practical approach to writing Linux device drivers as kernel @@ -354,7 +354,7 @@ On-line docs :Keywords: character driver, init_module, clean_up module, autodetection, mayor number, minor number, file operations, open(), close(). - :Description: Linux Journal Kernel Korner article. Here is its + :Description: Linux Journal Kernel Korner article. :Abstract: *This article, the second of four, introduces part of the actual code to create custom module implementing a character device driver. It describes the code for module initialization and @@ -367,7 +367,7 @@ On-line docs :Date: 1996 :Keywords: read(), write(), select(), ioctl(), blocking/non blocking mode, interrupt handler. - :Description: Linux Journal Kernel Korner article. Here is its + :Description: Linux Journal Kernel Korner article. :Abstract: *This article, the third of four on writing character device drivers, introduces concepts of reading, writing, and using ioctl-calls*. @@ -378,7 +378,7 @@ On-line docs :URL: http://www.linuxjournal.com/article.php?sid=1222 :Date: 1996 :Keywords: interrupts, irqs, DMA, bottom halves, task queues. - :Description: Linux Journal Kernel Korner article. Here is its + :Description: Linux Journal Kernel Korner article. :Abstract: *This is the fourth in a series of articles about writing character device drivers as loadable kernel modules. This month, we further investigate the field of interrupt handling. -- cgit v1.2.3 From d0c3bacb3e37488b00feb307c0ce43105a6fd23e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Feb 2020 16:06:49 -0800 Subject: doc: cgroup: improve formatting Fix tabs vs spaces issue which cases the line to be considered a new list entry. Signed-off-by: Jakub Kicinski Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20200228000653.1572553-2-kuba@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 3f801461f0f3..723c8bd422cc 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1103,7 +1103,7 @@ PAGE_SIZE multiple when read back. proportionally to the overage, reducing reclaim pressure for smaller overages. - Effective min boundary is limited by memory.min values of + Effective min boundary is limited by memory.min values of all ancestor cgroups. If there is memory.min overcommitment (child cgroup or cgroups are requiring more protected memory than parent will allow), then each child cgroup will get -- cgit v1.2.3 From 2551cab59927e3b50f45f3f04f7ce0c9708eb5fb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Feb 2020 16:06:50 -0800 Subject: doc: cgroup: improve formatting of mem stats If there is an empty line between item and description Sphinx does not emphasize the item. First half of the list does not have the empty line and is emphasized correctly. Signed-off-by: Jakub Kicinski Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20200228000653.1572553-3-kuba@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 723c8bd422cc..ab8b91014afb 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1313,53 +1313,41 @@ PAGE_SIZE multiple when read back. Number of major page faults incurred workingset_refault - Number of refaults of previously evicted pages workingset_activate - Number of refaulted pages that were immediately activated workingset_nodereclaim - Number of times a shadow node has been reclaimed pgrefill - Amount of scanned pages (in an active LRU list) pgscan - Amount of scanned pages (in an inactive LRU list) pgsteal - Amount of reclaimed pages pgactivate - Amount of pages moved to the active LRU list pgdeactivate - Amount of pages moved to the inactive LRU list pglazyfree - Amount of pages postponed to be freed under memory pressure pglazyfreed - Amount of reclaimed lazyfree pages thp_fault_alloc - Number of transparent hugepages which were allocated to satisfy a page fault, including COW faults. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE is not set. thp_collapse_alloc - Number of transparent hugepages which were allocated to allow collapsing an existing range of pages. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE is not set. -- cgit v1.2.3 From 69654d37cfa66bd0483f172d174180daf4527fea Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Feb 2020 16:06:51 -0800 Subject: doc: cgroup: improve formatting of io example We need a literal section, like few paragraphs below. Signed-off-by: Jakub Kicinski Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20200228000653.1572553-4-kuba@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index ab8b91014afb..9d16fbc5df63 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1466,7 +1466,7 @@ IO Interface Files dios Number of discard IOs ====== ===================== - An example read output follows: + An example read output follows:: 8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 dbytes=0 dios=0 8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021 -- cgit v1.2.3 From f3431ba715b5e7ecf6ae9634c0aa84305339e286 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Feb 2020 16:06:52 -0800 Subject: doc: cgroup: improve formatting of cpuset examples We need literal sections otherwise the entire example is rendered as a single line. Signed-off-by: Jakub Kicinski Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20200228000653.1572553-5-kuba@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/cgroup-v2.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 9d16fbc5df63..308d096af071 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1841,7 +1841,7 @@ Cpuset Interface Files from the requested CPUs. The CPU numbers are comma-separated numbers or ranges. - For example: + For example:: # cat cpuset.cpus 0-4,6,8-10 @@ -1880,7 +1880,7 @@ Cpuset Interface Files from the requested memory nodes. The memory node numbers are comma-separated numbers or ranges. - For example: + For example:: # cat cpuset.mems 0-1,3 -- cgit v1.2.3 From 373e8ffafd665ad114b96c547decce54b9621af4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Feb 2020 16:06:53 -0800 Subject: doc: cgroup: improve formatting of references Annotate references to other documents to make them clickable. Signed-off-by: Jakub Kicinski Acked-by: Johannes Weiner Link: https://lore.kernel.org/r/20200228000653.1572553-6-kuba@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/accounting/psi.rst | 2 ++ Documentation/admin-guide/cgroup-v1/index.rst | 2 ++ Documentation/admin-guide/cgroup-v2.rst | 8 ++++---- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index 621111ce5740..f2b3439edcc2 100644 --- a/Documentation/accounting/psi.rst +++ b/Documentation/accounting/psi.rst @@ -1,3 +1,5 @@ +.. _psi: + ================================ PSI - Pressure Stall Information ================================ diff --git a/Documentation/admin-guide/cgroup-v1/index.rst b/Documentation/admin-guide/cgroup-v1/index.rst index 10bf48bae0b0..226f64473e8e 100644 --- a/Documentation/admin-guide/cgroup-v1/index.rst +++ b/Documentation/admin-guide/cgroup-v1/index.rst @@ -1,3 +1,5 @@ +.. _cgroup-v1: + ======================== Control Groups version 1 ======================== diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 308d096af071..fbb111616705 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -9,7 +9,7 @@ This is the authoritative documentation on the design, interface and conventions of cgroup v2. It describes all userland-visible aspects of cgroup including core and specific controller behaviors. All future changes must be reflected in this document. Documentation for -v1 is available under Documentation/admin-guide/cgroup-v1/. +v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst `. .. CONTENTS @@ -1023,7 +1023,7 @@ All time durations are in microseconds. A read-only nested-key file which exists on non-root cgroups. Shows pressure stall information for CPU. See - Documentation/accounting/psi.rst for details. + :ref:`Documentation/accounting/psi.rst ` for details. cpu.uclamp.min A read-write single value file which exists on non-root cgroups. @@ -1391,7 +1391,7 @@ PAGE_SIZE multiple when read back. A read-only nested-key file which exists on non-root cgroups. Shows pressure stall information for memory. See - Documentation/accounting/psi.rst for details. + :ref:`Documentation/accounting/psi.rst ` for details. Usage Guidelines @@ -1631,7 +1631,7 @@ IO Interface Files A read-only nested-key file which exists on non-root cgroups. Shows pressure stall information for IO. See - Documentation/accounting/psi.rst for details. + :ref:`Documentation/accounting/psi.rst ` for details. Writeback -- cgit v1.2.3 From 669a5cc8c5d997147a0551c809d0e5f795867341 Mon Sep 17 00:00:00 2001 From: Sameer Rahmani Date: Tue, 25 Feb 2020 22:21:24 +0000 Subject: Documentation: Converted the `kobject.txt` to rst format Reviewed and converted the `kobject.txt` format to rst in place. Signed-off-by: Sameer Rahmani Link: https://lore.kernel.org/r/20200225222125.61874-1-lxsameer@gnu.org Signed-off-by: Jonathan Corbet --- Documentation/kobject.txt | 78 +++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt index ff4c25098119..1f62d4d7d966 100644 --- a/Documentation/kobject.txt +++ b/Documentation/kobject.txt @@ -25,7 +25,7 @@ some terms we will be working with. usually embedded within some other structure which contains the stuff the code is really interested in. - No structure should EVER have more than one kobject embedded within it. + No structure should **EVER** have more than one kobject embedded within it. If it does, the reference counting for the object is sure to be messed up and incorrect, and your code will be buggy. So do not do this. @@ -55,7 +55,7 @@ a larger, domain-specific object. To this end, kobjects will be found embedded in other structures. If you are used to thinking of things in object-oriented terms, kobjects can be seen as a top-level, abstract class from which other classes are derived. A kobject implements a set of -capabilities which are not particularly useful by themselves, but which are +capabilities which are not particularly useful by themselves, but are nice to have in other objects. The C language does not allow for the direct expression of inheritance, so other techniques - such as structure embedding - must be used. @@ -65,12 +65,12 @@ this is analogous as to how "list_head" structs are rarely useful on their own, but are invariably found embedded in the larger objects of interest.) -So, for example, the UIO code in drivers/uio/uio.c has a structure that +So, for example, the UIO code in ``drivers/uio/uio.c`` has a structure that defines the memory region associated with a uio device:: struct uio_map { - struct kobject kobj; - struct uio_mem *mem; + struct kobject kobj; + struct uio_mem *mem; }; If you have a struct uio_map structure, finding its embedded kobject is @@ -78,30 +78,30 @@ just a matter of using the kobj member. Code that works with kobjects will often have the opposite problem, however: given a struct kobject pointer, what is the pointer to the containing structure? You must avoid tricks (such as assuming that the kobject is at the beginning of the structure) -and, instead, use the container_of() macro, found in :: +and, instead, use the container_of() macro, found in ````:: container_of(pointer, type, member) where: - * "pointer" is the pointer to the embedded kobject, - * "type" is the type of the containing structure, and - * "member" is the name of the structure field to which "pointer" points. + * ``pointer`` is the pointer to the embedded kobject, + * ``type`` is the type of the containing structure, and + * ``member`` is the name of the structure field to which ``pointer`` points. The return value from container_of() is a pointer to the corresponding -container type. So, for example, a pointer "kp" to a struct kobject -embedded *within* a struct uio_map could be converted to a pointer to the -*containing* uio_map structure with:: +container type. So, for example, a pointer ``kp`` to a struct kobject +embedded **within** a struct uio_map could be converted to a pointer to the +**containing** uio_map structure with:: struct uio_map *u_map = container_of(kp, struct uio_map, kobj); -For convenience, programmers often define a simple macro for "back-casting" +For convenience, programmers often define a simple macro for **back-casting** kobject pointers to the containing type. Exactly this happens in the -earlier drivers/uio/uio.c, as you can see here:: +earlier ``drivers/uio/uio.c``, as you can see here:: struct uio_map { - struct kobject kobj; - struct uio_mem *mem; + struct kobject kobj; + struct uio_mem *mem; }; #define to_map(map) container_of(map, struct uio_map, kobj) @@ -125,7 +125,7 @@ must have an associated kobj_type. After calling kobject_init(), to register the kobject with sysfs, the function kobject_add() must be called:: int kobject_add(struct kobject *kobj, struct kobject *parent, - const char *fmt, ...); + const char *fmt, ...); This sets up the parent of the kobject and the name for the kobject properly. If the kobject is to be associated with a specific kset, @@ -172,13 +172,13 @@ call to kobject_uevent():: int kobject_uevent(struct kobject *kobj, enum kobject_action action); -Use the KOBJ_ADD action for when the kobject is first added to the kernel. +Use the **KOBJ_ADD** action for when the kobject is first added to the kernel. This should be done only after any attributes or children of the kobject have been initialized properly, as userspace will instantly start to look for them when this call happens. When the kobject is removed from the kernel (details on how to do that are -below), the uevent for KOBJ_REMOVE will be automatically created by the +below), the uevent for **KOBJ_REMOVE** will be automatically created by the kobject core, so the caller does not have to worry about doing that by hand. @@ -238,7 +238,7 @@ Both types of attributes used here, with a kobject that has been created with the kobject_create_and_add(), can be of type kobj_attribute, so no special custom attribute is needed to be created. -See the example module, samples/kobject/kobject-example.c for an +See the example module, ``samples/kobject/kobject-example.c`` for an implementation of a simple kobject and attributes. @@ -270,10 +270,10 @@ such a method has a form like:: void my_object_release(struct kobject *kobj) { - struct my_object *mine = container_of(kobj, struct my_object, kobj); + struct my_object *mine = container_of(kobj, struct my_object, kobj); - /* Perform any additional cleanup on this object, then... */ - kfree(mine); + /* Perform any additional cleanup on this object, then... */ + kfree(mine); } One important point cannot be overstated: every kobject must have a @@ -297,11 +297,11 @@ instead, it is associated with the ktype. So let us introduce struct kobj_type:: struct kobj_type { - void (*release)(struct kobject *kobj); - const struct sysfs_ops *sysfs_ops; - struct attribute **default_attrs; - const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); - const void *(*namespace)(struct kobject *kobj); + void (*release)(struct kobject *kobj); + const struct sysfs_ops *sysfs_ops; + struct attribute **default_attrs; + const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); + const void *(*namespace)(struct kobject *kobj); }; This structure is used to describe a particular type of kobject (or, more @@ -352,8 +352,8 @@ created and never declared statically or on the stack. To create a new kset use:: struct kset *kset_create_and_add(const char *name, - struct kset_uevent_ops *u, - struct kobject *parent); + struct kset_uevent_ops *u, + struct kobject *parent); When you are finished with the kset, call:: @@ -365,16 +365,16 @@ Because other references to the kset may still exist, the release may happen after kset_unregister() returns. An example of using a kset can be seen in the -samples/kobject/kset-example.c file in the kernel tree. +``samples/kobject/kset-example.c`` file in the kernel tree. If a kset wishes to control the uevent operations of the kobjects associated with it, it can use the struct kset_uevent_ops to handle it:: struct kset_uevent_ops { - int (*filter)(struct kset *kset, struct kobject *kobj); - const char *(*name)(struct kset *kset, struct kobject *kobj); - int (*uevent)(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env); + int (*filter)(struct kset *kset, struct kobject *kobj); + const char *(*name)(struct kset *kset, struct kobject *kobj); + int (*uevent)(struct kset *kset, struct kobject *kobj, + struct kobj_uevent_env *env); }; @@ -408,8 +408,8 @@ Kobject removal After a kobject has been registered with the kobject core successfully, it must be cleaned up when the code is finished with it. To do that, call kobject_put(). By doing this, the kobject core will automatically clean up -all of the memory allocated by this kobject. If a KOBJ_ADD uevent has been -sent for the object, a corresponding KOBJ_REMOVE uevent will be sent, and +all of the memory allocated by this kobject. If a ``KOBJ_ADD`` uevent has been +sent for the object, a corresponding ``KOBJ_REMOVE`` uevent will be sent, and any other sysfs housekeeping will be handled for the caller properly. If you need to do a two-stage delete of the kobject (say you are not @@ -430,5 +430,5 @@ Example code to copy from ========================= For a more complete example of using ksets and kobjects properly, see the -example programs samples/kobject/{kobject-example.c,kset-example.c}, -which will be built as loadable modules if you select CONFIG_SAMPLE_KOBJECT. +example programs ``samples/kobject/{kobject-example.c,kset-example.c}``, +which will be built as loadable modules if you select ``CONFIG_SAMPLE_KOBJECT``. -- cgit v1.2.3 From 5fed00dcaca8bbd428742a6db1980753290eb204 Mon Sep 17 00:00:00 2001 From: Sameer Rahmani Date: Tue, 25 Feb 2020 22:21:25 +0000 Subject: Documentation: kobject.txt has been moved to core-api/kobject.rst Moved the `kobject.txt` to `core-api/kobject.rst` and updated the `core-api` index to point to it. Signed-off-by: Sameer Rahmani [jc: moved it down from the top of core-api/index.rst] Link: https://lore.kernel.org/r/20200225222125.61874-2-lxsameer@gnu.org Signed-off-by: Jonathan Corbet --- Documentation/core-api/index.rst | 1 + Documentation/core-api/kobject.rst | 434 +++++++++++++++++++++++++++++++++++++ Documentation/kobject.txt | 434 ------------------------------------- 3 files changed, 435 insertions(+), 434 deletions(-) create mode 100644 Documentation/core-api/kobject.rst delete mode 100644 Documentation/kobject.txt (limited to 'Documentation') diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index a501dc1c90d0..d02b26917931 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -12,6 +12,7 @@ Core utilities :maxdepth: 1 kernel-api + kobject assoc_array atomic_ops cachetlb diff --git a/Documentation/core-api/kobject.rst b/Documentation/core-api/kobject.rst new file mode 100644 index 000000000000..1f62d4d7d966 --- /dev/null +++ b/Documentation/core-api/kobject.rst @@ -0,0 +1,434 @@ +===================================================================== +Everything you never wanted to know about kobjects, ksets, and ktypes +===================================================================== + +:Author: Greg Kroah-Hartman +:Last updated: December 19, 2007 + +Based on an original article by Jon Corbet for lwn.net written October 1, +2003 and located at http://lwn.net/Articles/51437/ + +Part of the difficulty in understanding the driver model - and the kobject +abstraction upon which it is built - is that there is no obvious starting +place. Dealing with kobjects requires understanding a few different types, +all of which make reference to each other. In an attempt to make things +easier, we'll take a multi-pass approach, starting with vague terms and +adding detail as we go. To that end, here are some quick definitions of +some terms we will be working with. + + - A kobject is an object of type struct kobject. Kobjects have a name + and a reference count. A kobject also has a parent pointer (allowing + objects to be arranged into hierarchies), a specific type, and, + usually, a representation in the sysfs virtual filesystem. + + Kobjects are generally not interesting on their own; instead, they are + usually embedded within some other structure which contains the stuff + the code is really interested in. + + No structure should **EVER** have more than one kobject embedded within it. + If it does, the reference counting for the object is sure to be messed + up and incorrect, and your code will be buggy. So do not do this. + + - A ktype is the type of object that embeds a kobject. Every structure + that embeds a kobject needs a corresponding ktype. The ktype controls + what happens to the kobject when it is created and destroyed. + + - A kset is a group of kobjects. These kobjects can be of the same ktype + or belong to different ktypes. The kset is the basic container type for + collections of kobjects. Ksets contain their own kobjects, but you can + safely ignore that implementation detail as the kset core code handles + this kobject automatically. + + When you see a sysfs directory full of other directories, generally each + of those directories corresponds to a kobject in the same kset. + +We'll look at how to create and manipulate all of these types. A bottom-up +approach will be taken, so we'll go back to kobjects. + + +Embedding kobjects +================== + +It is rare for kernel code to create a standalone kobject, with one major +exception explained below. Instead, kobjects are used to control access to +a larger, domain-specific object. To this end, kobjects will be found +embedded in other structures. If you are used to thinking of things in +object-oriented terms, kobjects can be seen as a top-level, abstract class +from which other classes are derived. A kobject implements a set of +capabilities which are not particularly useful by themselves, but are +nice to have in other objects. The C language does not allow for the +direct expression of inheritance, so other techniques - such as structure +embedding - must be used. + +(As an aside, for those familiar with the kernel linked list implementation, +this is analogous as to how "list_head" structs are rarely useful on +their own, but are invariably found embedded in the larger objects of +interest.) + +So, for example, the UIO code in ``drivers/uio/uio.c`` has a structure that +defines the memory region associated with a uio device:: + + struct uio_map { + struct kobject kobj; + struct uio_mem *mem; + }; + +If you have a struct uio_map structure, finding its embedded kobject is +just a matter of using the kobj member. Code that works with kobjects will +often have the opposite problem, however: given a struct kobject pointer, +what is the pointer to the containing structure? You must avoid tricks +(such as assuming that the kobject is at the beginning of the structure) +and, instead, use the container_of() macro, found in ````:: + + container_of(pointer, type, member) + +where: + + * ``pointer`` is the pointer to the embedded kobject, + * ``type`` is the type of the containing structure, and + * ``member`` is the name of the structure field to which ``pointer`` points. + +The return value from container_of() is a pointer to the corresponding +container type. So, for example, a pointer ``kp`` to a struct kobject +embedded **within** a struct uio_map could be converted to a pointer to the +**containing** uio_map structure with:: + + struct uio_map *u_map = container_of(kp, struct uio_map, kobj); + +For convenience, programmers often define a simple macro for **back-casting** +kobject pointers to the containing type. Exactly this happens in the +earlier ``drivers/uio/uio.c``, as you can see here:: + + struct uio_map { + struct kobject kobj; + struct uio_mem *mem; + }; + + #define to_map(map) container_of(map, struct uio_map, kobj) + +where the macro argument "map" is a pointer to the struct kobject in +question. That macro is subsequently invoked with:: + + struct uio_map *map = to_map(kobj); + + +Initialization of kobjects +========================== + +Code which creates a kobject must, of course, initialize that object. Some +of the internal fields are setup with a (mandatory) call to kobject_init():: + + void kobject_init(struct kobject *kobj, struct kobj_type *ktype); + +The ktype is required for a kobject to be created properly, as every kobject +must have an associated kobj_type. After calling kobject_init(), to +register the kobject with sysfs, the function kobject_add() must be called:: + + int kobject_add(struct kobject *kobj, struct kobject *parent, + const char *fmt, ...); + +This sets up the parent of the kobject and the name for the kobject +properly. If the kobject is to be associated with a specific kset, +kobj->kset must be assigned before calling kobject_add(). If a kset is +associated with a kobject, then the parent for the kobject can be set to +NULL in the call to kobject_add() and then the kobject's parent will be the +kset itself. + +As the name of the kobject is set when it is added to the kernel, the name +of the kobject should never be manipulated directly. If you must change +the name of the kobject, call kobject_rename():: + + int kobject_rename(struct kobject *kobj, const char *new_name); + +kobject_rename does not perform any locking or have a solid notion of +what names are valid so the caller must provide their own sanity checking +and serialization. + +There is a function called kobject_set_name() but that is legacy cruft and +is being removed. If your code needs to call this function, it is +incorrect and needs to be fixed. + +To properly access the name of the kobject, use the function +kobject_name():: + + const char *kobject_name(const struct kobject * kobj); + +There is a helper function to both initialize and add the kobject to the +kernel at the same time, called surprisingly enough kobject_init_and_add():: + + int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, + struct kobject *parent, const char *fmt, ...); + +The arguments are the same as the individual kobject_init() and +kobject_add() functions described above. + + +Uevents +======= + +After a kobject has been registered with the kobject core, you need to +announce to the world that it has been created. This can be done with a +call to kobject_uevent():: + + int kobject_uevent(struct kobject *kobj, enum kobject_action action); + +Use the **KOBJ_ADD** action for when the kobject is first added to the kernel. +This should be done only after any attributes or children of the kobject +have been initialized properly, as userspace will instantly start to look +for them when this call happens. + +When the kobject is removed from the kernel (details on how to do that are +below), the uevent for **KOBJ_REMOVE** will be automatically created by the +kobject core, so the caller does not have to worry about doing that by +hand. + + +Reference counts +================ + +One of the key functions of a kobject is to serve as a reference counter +for the object in which it is embedded. As long as references to the object +exist, the object (and the code which supports it) must continue to exist. +The low-level functions for manipulating a kobject's reference counts are:: + + struct kobject *kobject_get(struct kobject *kobj); + void kobject_put(struct kobject *kobj); + +A successful call to kobject_get() will increment the kobject's reference +counter and return the pointer to the kobject. + +When a reference is released, the call to kobject_put() will decrement the +reference count and, possibly, free the object. Note that kobject_init() +sets the reference count to one, so the code which sets up the kobject will +need to do a kobject_put() eventually to release that reference. + +Because kobjects are dynamic, they must not be declared statically or on +the stack, but instead, always allocated dynamically. Future versions of +the kernel will contain a run-time check for kobjects that are created +statically and will warn the developer of this improper usage. + +If all that you want to use a kobject for is to provide a reference counter +for your structure, please use the struct kref instead; a kobject would be +overkill. For more information on how to use struct kref, please see the +file Documentation/kref.txt in the Linux kernel source tree. + + +Creating "simple" kobjects +========================== + +Sometimes all that a developer wants is a way to create a simple directory +in the sysfs hierarchy, and not have to mess with the whole complication of +ksets, show and store functions, and other details. This is the one +exception where a single kobject should be created. To create such an +entry, use the function:: + + struct kobject *kobject_create_and_add(char *name, struct kobject *parent); + +This function will create a kobject and place it in sysfs in the location +underneath the specified parent kobject. To create simple attributes +associated with this kobject, use:: + + int sysfs_create_file(struct kobject *kobj, struct attribute *attr); + +or:: + + int sysfs_create_group(struct kobject *kobj, struct attribute_group *grp); + +Both types of attributes used here, with a kobject that has been created +with the kobject_create_and_add(), can be of type kobj_attribute, so no +special custom attribute is needed to be created. + +See the example module, ``samples/kobject/kobject-example.c`` for an +implementation of a simple kobject and attributes. + + + +ktypes and release methods +========================== + +One important thing still missing from the discussion is what happens to a +kobject when its reference count reaches zero. The code which created the +kobject generally does not know when that will happen; if it did, there +would be little point in using a kobject in the first place. Even +predictable object lifecycles become more complicated when sysfs is brought +in as other portions of the kernel can get a reference on any kobject that +is registered in the system. + +The end result is that a structure protected by a kobject cannot be freed +before its reference count goes to zero. The reference count is not under +the direct control of the code which created the kobject. So that code must +be notified asynchronously whenever the last reference to one of its +kobjects goes away. + +Once you registered your kobject via kobject_add(), you must never use +kfree() to free it directly. The only safe way is to use kobject_put(). It +is good practice to always use kobject_put() after kobject_init() to avoid +errors creeping in. + +This notification is done through a kobject's release() method. Usually +such a method has a form like:: + + void my_object_release(struct kobject *kobj) + { + struct my_object *mine = container_of(kobj, struct my_object, kobj); + + /* Perform any additional cleanup on this object, then... */ + kfree(mine); + } + +One important point cannot be overstated: every kobject must have a +release() method, and the kobject must persist (in a consistent state) +until that method is called. If these constraints are not met, the code is +flawed. Note that the kernel will warn you if you forget to provide a +release() method. Do not try to get rid of this warning by providing an +"empty" release function. + +If all your cleanup function needs to do is call kfree(), then you must +create a wrapper function which uses container_of() to upcast to the correct +type (as shown in the example above) and then calls kfree() on the overall +structure. + +Note, the name of the kobject is available in the release function, but it +must NOT be changed within this callback. Otherwise there will be a memory +leak in the kobject core, which makes people unhappy. + +Interestingly, the release() method is not stored in the kobject itself; +instead, it is associated with the ktype. So let us introduce struct +kobj_type:: + + struct kobj_type { + void (*release)(struct kobject *kobj); + const struct sysfs_ops *sysfs_ops; + struct attribute **default_attrs; + const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); + const void *(*namespace)(struct kobject *kobj); + }; + +This structure is used to describe a particular type of kobject (or, more +correctly, of containing object). Every kobject needs to have an associated +kobj_type structure; a pointer to that structure must be specified when you +call kobject_init() or kobject_init_and_add(). + +The release field in struct kobj_type is, of course, a pointer to the +release() method for this type of kobject. The other two fields (sysfs_ops +and default_attrs) control how objects of this type are represented in +sysfs; they are beyond the scope of this document. + +The default_attrs pointer is a list of default attributes that will be +automatically created for any kobject that is registered with this ktype. + + +ksets +===== + +A kset is merely a collection of kobjects that want to be associated with +each other. There is no restriction that they be of the same ktype, but be +very careful if they are not. + +A kset serves these functions: + + - It serves as a bag containing a group of objects. A kset can be used by + the kernel to track "all block devices" or "all PCI device drivers." + + - A kset is also a subdirectory in sysfs, where the associated kobjects + with the kset can show up. Every kset contains a kobject which can be + set up to be the parent of other kobjects; the top-level directories of + the sysfs hierarchy are constructed in this way. + + - Ksets can support the "hotplugging" of kobjects and influence how + uevent events are reported to user space. + +In object-oriented terms, "kset" is the top-level container class; ksets +contain their own kobject, but that kobject is managed by the kset code and +should not be manipulated by any other user. + +A kset keeps its children in a standard kernel linked list. Kobjects point +back to their containing kset via their kset field. In almost all cases, +the kobjects belonging to a kset have that kset (or, strictly, its embedded +kobject) in their parent. + +As a kset contains a kobject within it, it should always be dynamically +created and never declared statically or on the stack. To create a new +kset use:: + + struct kset *kset_create_and_add(const char *name, + struct kset_uevent_ops *u, + struct kobject *parent); + +When you are finished with the kset, call:: + + void kset_unregister(struct kset *kset); + +to destroy it. This removes the kset from sysfs and decrements its reference +count. When the reference count goes to zero, the kset will be released. +Because other references to the kset may still exist, the release may happen +after kset_unregister() returns. + +An example of using a kset can be seen in the +``samples/kobject/kset-example.c`` file in the kernel tree. + +If a kset wishes to control the uevent operations of the kobjects +associated with it, it can use the struct kset_uevent_ops to handle it:: + + struct kset_uevent_ops { + int (*filter)(struct kset *kset, struct kobject *kobj); + const char *(*name)(struct kset *kset, struct kobject *kobj); + int (*uevent)(struct kset *kset, struct kobject *kobj, + struct kobj_uevent_env *env); + }; + + +The filter function allows a kset to prevent a uevent from being emitted to +userspace for a specific kobject. If the function returns 0, the uevent +will not be emitted. + +The name function will be called to override the default name of the kset +that the uevent sends to userspace. By default, the name will be the same +as the kset itself, but this function, if present, can override that name. + +The uevent function will be called when the uevent is about to be sent to +userspace to allow more environment variables to be added to the uevent. + +One might ask how, exactly, a kobject is added to a kset, given that no +functions which perform that function have been presented. The answer is +that this task is handled by kobject_add(). When a kobject is passed to +kobject_add(), its kset member should point to the kset to which the +kobject will belong. kobject_add() will handle the rest. + +If the kobject belonging to a kset has no parent kobject set, it will be +added to the kset's directory. Not all members of a kset do necessarily +live in the kset directory. If an explicit parent kobject is assigned +before the kobject is added, the kobject is registered with the kset, but +added below the parent kobject. + + +Kobject removal +=============== + +After a kobject has been registered with the kobject core successfully, it +must be cleaned up when the code is finished with it. To do that, call +kobject_put(). By doing this, the kobject core will automatically clean up +all of the memory allocated by this kobject. If a ``KOBJ_ADD`` uevent has been +sent for the object, a corresponding ``KOBJ_REMOVE`` uevent will be sent, and +any other sysfs housekeeping will be handled for the caller properly. + +If you need to do a two-stage delete of the kobject (say you are not +allowed to sleep when you need to destroy the object), then call +kobject_del() which will unregister the kobject from sysfs. This makes the +kobject "invisible", but it is not cleaned up, and the reference count of +the object is still the same. At a later time call kobject_put() to finish +the cleanup of the memory associated with the kobject. + +kobject_del() can be used to drop the reference to the parent object, if +circular references are constructed. It is valid in some cases, that a +parent objects references a child. Circular references _must_ be broken +with an explicit call to kobject_del(), so that a release functions will be +called, and the objects in the former circle release each other. + + +Example code to copy from +========================= + +For a more complete example of using ksets and kobjects properly, see the +example programs ``samples/kobject/{kobject-example.c,kset-example.c}``, +which will be built as loadable modules if you select ``CONFIG_SAMPLE_KOBJECT``. diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt deleted file mode 100644 index 1f62d4d7d966..000000000000 --- a/Documentation/kobject.txt +++ /dev/null @@ -1,434 +0,0 @@ -===================================================================== -Everything you never wanted to know about kobjects, ksets, and ktypes -===================================================================== - -:Author: Greg Kroah-Hartman -:Last updated: December 19, 2007 - -Based on an original article by Jon Corbet for lwn.net written October 1, -2003 and located at http://lwn.net/Articles/51437/ - -Part of the difficulty in understanding the driver model - and the kobject -abstraction upon which it is built - is that there is no obvious starting -place. Dealing with kobjects requires understanding a few different types, -all of which make reference to each other. In an attempt to make things -easier, we'll take a multi-pass approach, starting with vague terms and -adding detail as we go. To that end, here are some quick definitions of -some terms we will be working with. - - - A kobject is an object of type struct kobject. Kobjects have a name - and a reference count. A kobject also has a parent pointer (allowing - objects to be arranged into hierarchies), a specific type, and, - usually, a representation in the sysfs virtual filesystem. - - Kobjects are generally not interesting on their own; instead, they are - usually embedded within some other structure which contains the stuff - the code is really interested in. - - No structure should **EVER** have more than one kobject embedded within it. - If it does, the reference counting for the object is sure to be messed - up and incorrect, and your code will be buggy. So do not do this. - - - A ktype is the type of object that embeds a kobject. Every structure - that embeds a kobject needs a corresponding ktype. The ktype controls - what happens to the kobject when it is created and destroyed. - - - A kset is a group of kobjects. These kobjects can be of the same ktype - or belong to different ktypes. The kset is the basic container type for - collections of kobjects. Ksets contain their own kobjects, but you can - safely ignore that implementation detail as the kset core code handles - this kobject automatically. - - When you see a sysfs directory full of other directories, generally each - of those directories corresponds to a kobject in the same kset. - -We'll look at how to create and manipulate all of these types. A bottom-up -approach will be taken, so we'll go back to kobjects. - - -Embedding kobjects -================== - -It is rare for kernel code to create a standalone kobject, with one major -exception explained below. Instead, kobjects are used to control access to -a larger, domain-specific object. To this end, kobjects will be found -embedded in other structures. If you are used to thinking of things in -object-oriented terms, kobjects can be seen as a top-level, abstract class -from which other classes are derived. A kobject implements a set of -capabilities which are not particularly useful by themselves, but are -nice to have in other objects. The C language does not allow for the -direct expression of inheritance, so other techniques - such as structure -embedding - must be used. - -(As an aside, for those familiar with the kernel linked list implementation, -this is analogous as to how "list_head" structs are rarely useful on -their own, but are invariably found embedded in the larger objects of -interest.) - -So, for example, the UIO code in ``drivers/uio/uio.c`` has a structure that -defines the memory region associated with a uio device:: - - struct uio_map { - struct kobject kobj; - struct uio_mem *mem; - }; - -If you have a struct uio_map structure, finding its embedded kobject is -just a matter of using the kobj member. Code that works with kobjects will -often have the opposite problem, however: given a struct kobject pointer, -what is the pointer to the containing structure? You must avoid tricks -(such as assuming that the kobject is at the beginning of the structure) -and, instead, use the container_of() macro, found in ````:: - - container_of(pointer, type, member) - -where: - - * ``pointer`` is the pointer to the embedded kobject, - * ``type`` is the type of the containing structure, and - * ``member`` is the name of the structure field to which ``pointer`` points. - -The return value from container_of() is a pointer to the corresponding -container type. So, for example, a pointer ``kp`` to a struct kobject -embedded **within** a struct uio_map could be converted to a pointer to the -**containing** uio_map structure with:: - - struct uio_map *u_map = container_of(kp, struct uio_map, kobj); - -For convenience, programmers often define a simple macro for **back-casting** -kobject pointers to the containing type. Exactly this happens in the -earlier ``drivers/uio/uio.c``, as you can see here:: - - struct uio_map { - struct kobject kobj; - struct uio_mem *mem; - }; - - #define to_map(map) container_of(map, struct uio_map, kobj) - -where the macro argument "map" is a pointer to the struct kobject in -question. That macro is subsequently invoked with:: - - struct uio_map *map = to_map(kobj); - - -Initialization of kobjects -========================== - -Code which creates a kobject must, of course, initialize that object. Some -of the internal fields are setup with a (mandatory) call to kobject_init():: - - void kobject_init(struct kobject *kobj, struct kobj_type *ktype); - -The ktype is required for a kobject to be created properly, as every kobject -must have an associated kobj_type. After calling kobject_init(), to -register the kobject with sysfs, the function kobject_add() must be called:: - - int kobject_add(struct kobject *kobj, struct kobject *parent, - const char *fmt, ...); - -This sets up the parent of the kobject and the name for the kobject -properly. If the kobject is to be associated with a specific kset, -kobj->kset must be assigned before calling kobject_add(). If a kset is -associated with a kobject, then the parent for the kobject can be set to -NULL in the call to kobject_add() and then the kobject's parent will be the -kset itself. - -As the name of the kobject is set when it is added to the kernel, the name -of the kobject should never be manipulated directly. If you must change -the name of the kobject, call kobject_rename():: - - int kobject_rename(struct kobject *kobj, const char *new_name); - -kobject_rename does not perform any locking or have a solid notion of -what names are valid so the caller must provide their own sanity checking -and serialization. - -There is a function called kobject_set_name() but that is legacy cruft and -is being removed. If your code needs to call this function, it is -incorrect and needs to be fixed. - -To properly access the name of the kobject, use the function -kobject_name():: - - const char *kobject_name(const struct kobject * kobj); - -There is a helper function to both initialize and add the kobject to the -kernel at the same time, called surprisingly enough kobject_init_and_add():: - - int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, - struct kobject *parent, const char *fmt, ...); - -The arguments are the same as the individual kobject_init() and -kobject_add() functions described above. - - -Uevents -======= - -After a kobject has been registered with the kobject core, you need to -announce to the world that it has been created. This can be done with a -call to kobject_uevent():: - - int kobject_uevent(struct kobject *kobj, enum kobject_action action); - -Use the **KOBJ_ADD** action for when the kobject is first added to the kernel. -This should be done only after any attributes or children of the kobject -have been initialized properly, as userspace will instantly start to look -for them when this call happens. - -When the kobject is removed from the kernel (details on how to do that are -below), the uevent for **KOBJ_REMOVE** will be automatically created by the -kobject core, so the caller does not have to worry about doing that by -hand. - - -Reference counts -================ - -One of the key functions of a kobject is to serve as a reference counter -for the object in which it is embedded. As long as references to the object -exist, the object (and the code which supports it) must continue to exist. -The low-level functions for manipulating a kobject's reference counts are:: - - struct kobject *kobject_get(struct kobject *kobj); - void kobject_put(struct kobject *kobj); - -A successful call to kobject_get() will increment the kobject's reference -counter and return the pointer to the kobject. - -When a reference is released, the call to kobject_put() will decrement the -reference count and, possibly, free the object. Note that kobject_init() -sets the reference count to one, so the code which sets up the kobject will -need to do a kobject_put() eventually to release that reference. - -Because kobjects are dynamic, they must not be declared statically or on -the stack, but instead, always allocated dynamically. Future versions of -the kernel will contain a run-time check for kobjects that are created -statically and will warn the developer of this improper usage. - -If all that you want to use a kobject for is to provide a reference counter -for your structure, please use the struct kref instead; a kobject would be -overkill. For more information on how to use struct kref, please see the -file Documentation/kref.txt in the Linux kernel source tree. - - -Creating "simple" kobjects -========================== - -Sometimes all that a developer wants is a way to create a simple directory -in the sysfs hierarchy, and not have to mess with the whole complication of -ksets, show and store functions, and other details. This is the one -exception where a single kobject should be created. To create such an -entry, use the function:: - - struct kobject *kobject_create_and_add(char *name, struct kobject *parent); - -This function will create a kobject and place it in sysfs in the location -underneath the specified parent kobject. To create simple attributes -associated with this kobject, use:: - - int sysfs_create_file(struct kobject *kobj, struct attribute *attr); - -or:: - - int sysfs_create_group(struct kobject *kobj, struct attribute_group *grp); - -Both types of attributes used here, with a kobject that has been created -with the kobject_create_and_add(), can be of type kobj_attribute, so no -special custom attribute is needed to be created. - -See the example module, ``samples/kobject/kobject-example.c`` for an -implementation of a simple kobject and attributes. - - - -ktypes and release methods -========================== - -One important thing still missing from the discussion is what happens to a -kobject when its reference count reaches zero. The code which created the -kobject generally does not know when that will happen; if it did, there -would be little point in using a kobject in the first place. Even -predictable object lifecycles become more complicated when sysfs is brought -in as other portions of the kernel can get a reference on any kobject that -is registered in the system. - -The end result is that a structure protected by a kobject cannot be freed -before its reference count goes to zero. The reference count is not under -the direct control of the code which created the kobject. So that code must -be notified asynchronously whenever the last reference to one of its -kobjects goes away. - -Once you registered your kobject via kobject_add(), you must never use -kfree() to free it directly. The only safe way is to use kobject_put(). It -is good practice to always use kobject_put() after kobject_init() to avoid -errors creeping in. - -This notification is done through a kobject's release() method. Usually -such a method has a form like:: - - void my_object_release(struct kobject *kobj) - { - struct my_object *mine = container_of(kobj, struct my_object, kobj); - - /* Perform any additional cleanup on this object, then... */ - kfree(mine); - } - -One important point cannot be overstated: every kobject must have a -release() method, and the kobject must persist (in a consistent state) -until that method is called. If these constraints are not met, the code is -flawed. Note that the kernel will warn you if you forget to provide a -release() method. Do not try to get rid of this warning by providing an -"empty" release function. - -If all your cleanup function needs to do is call kfree(), then you must -create a wrapper function which uses container_of() to upcast to the correct -type (as shown in the example above) and then calls kfree() on the overall -structure. - -Note, the name of the kobject is available in the release function, but it -must NOT be changed within this callback. Otherwise there will be a memory -leak in the kobject core, which makes people unhappy. - -Interestingly, the release() method is not stored in the kobject itself; -instead, it is associated with the ktype. So let us introduce struct -kobj_type:: - - struct kobj_type { - void (*release)(struct kobject *kobj); - const struct sysfs_ops *sysfs_ops; - struct attribute **default_attrs; - const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); - const void *(*namespace)(struct kobject *kobj); - }; - -This structure is used to describe a particular type of kobject (or, more -correctly, of containing object). Every kobject needs to have an associated -kobj_type structure; a pointer to that structure must be specified when you -call kobject_init() or kobject_init_and_add(). - -The release field in struct kobj_type is, of course, a pointer to the -release() method for this type of kobject. The other two fields (sysfs_ops -and default_attrs) control how objects of this type are represented in -sysfs; they are beyond the scope of this document. - -The default_attrs pointer is a list of default attributes that will be -automatically created for any kobject that is registered with this ktype. - - -ksets -===== - -A kset is merely a collection of kobjects that want to be associated with -each other. There is no restriction that they be of the same ktype, but be -very careful if they are not. - -A kset serves these functions: - - - It serves as a bag containing a group of objects. A kset can be used by - the kernel to track "all block devices" or "all PCI device drivers." - - - A kset is also a subdirectory in sysfs, where the associated kobjects - with the kset can show up. Every kset contains a kobject which can be - set up to be the parent of other kobjects; the top-level directories of - the sysfs hierarchy are constructed in this way. - - - Ksets can support the "hotplugging" of kobjects and influence how - uevent events are reported to user space. - -In object-oriented terms, "kset" is the top-level container class; ksets -contain their own kobject, but that kobject is managed by the kset code and -should not be manipulated by any other user. - -A kset keeps its children in a standard kernel linked list. Kobjects point -back to their containing kset via their kset field. In almost all cases, -the kobjects belonging to a kset have that kset (or, strictly, its embedded -kobject) in their parent. - -As a kset contains a kobject within it, it should always be dynamically -created and never declared statically or on the stack. To create a new -kset use:: - - struct kset *kset_create_and_add(const char *name, - struct kset_uevent_ops *u, - struct kobject *parent); - -When you are finished with the kset, call:: - - void kset_unregister(struct kset *kset); - -to destroy it. This removes the kset from sysfs and decrements its reference -count. When the reference count goes to zero, the kset will be released. -Because other references to the kset may still exist, the release may happen -after kset_unregister() returns. - -An example of using a kset can be seen in the -``samples/kobject/kset-example.c`` file in the kernel tree. - -If a kset wishes to control the uevent operations of the kobjects -associated with it, it can use the struct kset_uevent_ops to handle it:: - - struct kset_uevent_ops { - int (*filter)(struct kset *kset, struct kobject *kobj); - const char *(*name)(struct kset *kset, struct kobject *kobj); - int (*uevent)(struct kset *kset, struct kobject *kobj, - struct kobj_uevent_env *env); - }; - - -The filter function allows a kset to prevent a uevent from being emitted to -userspace for a specific kobject. If the function returns 0, the uevent -will not be emitted. - -The name function will be called to override the default name of the kset -that the uevent sends to userspace. By default, the name will be the same -as the kset itself, but this function, if present, can override that name. - -The uevent function will be called when the uevent is about to be sent to -userspace to allow more environment variables to be added to the uevent. - -One might ask how, exactly, a kobject is added to a kset, given that no -functions which perform that function have been presented. The answer is -that this task is handled by kobject_add(). When a kobject is passed to -kobject_add(), its kset member should point to the kset to which the -kobject will belong. kobject_add() will handle the rest. - -If the kobject belonging to a kset has no parent kobject set, it will be -added to the kset's directory. Not all members of a kset do necessarily -live in the kset directory. If an explicit parent kobject is assigned -before the kobject is added, the kobject is registered with the kset, but -added below the parent kobject. - - -Kobject removal -=============== - -After a kobject has been registered with the kobject core successfully, it -must be cleaned up when the code is finished with it. To do that, call -kobject_put(). By doing this, the kobject core will automatically clean up -all of the memory allocated by this kobject. If a ``KOBJ_ADD`` uevent has been -sent for the object, a corresponding ``KOBJ_REMOVE`` uevent will be sent, and -any other sysfs housekeeping will be handled for the caller properly. - -If you need to do a two-stage delete of the kobject (say you are not -allowed to sleep when you need to destroy the object), then call -kobject_del() which will unregister the kobject from sysfs. This makes the -kobject "invisible", but it is not cleaned up, and the reference count of -the object is still the same. At a later time call kobject_put() to finish -the cleanup of the memory associated with the kobject. - -kobject_del() can be used to drop the reference to the parent object, if -circular references are constructed. It is valid in some cases, that a -parent objects references a child. Circular references _must_ be broken -with an explicit call to kobject_del(), so that a release functions will be -called, and the objects in the former circle release each other. - - -Example code to copy from -========================= - -For a more complete example of using ksets and kobjects properly, see the -example programs ``samples/kobject/{kobject-example.c,kset-example.c}``, -which will be built as loadable modules if you select ``CONFIG_SAMPLE_KOBJECT``. -- cgit v1.2.3 From ae5977765acb25c1eafb348f81a6597cb7a88eba Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 25 Feb 2020 20:40:52 +0800 Subject: Documentation: kthread: Fix WQ_SYSFS workqueues path name The set of WQ_SYSFS workqueues should be displayed using "ls /sys/devices/virtual/workqueue", add the missing '/'. Signed-off-by: Zenghui Yu Link: https://lore.kernel.org/r/20200225124052.1506-1-yuzenghui@huawei.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-per-CPU-kthreads.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst index baeeba8762ae..21818aca4708 100644 --- a/Documentation/admin-guide/kernel-per-CPU-kthreads.rst +++ b/Documentation/admin-guide/kernel-per-CPU-kthreads.rst @@ -234,7 +234,7 @@ To reduce its OS jitter, do any of the following: Such a workqueue can be confined to a given subset of the CPUs using the ``/sys/devices/virtual/workqueue/*/cpumask`` sysfs files. The set of WQ_SYSFS workqueues can be displayed using - "ls sys/devices/virtual/workqueue". That said, the workqueues + "ls /sys/devices/virtual/workqueue". That said, the workqueues maintainer would like to caution people against indiscriminately sprinkling WQ_SYSFS across all the workqueues. The reason for caution is that it is easy to add WQ_SYSFS, but because sysfs is -- cgit v1.2.3 From 3eb30c51a6dda26d0c5b8824b7c0515502f1c161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 12 Feb 2020 19:13:32 +0100 Subject: Documentation: nfsroot.rst: Fix references to nfsroot.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When converting and moving nfsroot.txt to nfsroot.rst the references to the old text file was not updated to match the change, fix this. Fixes: f9a9349846f92b2d ("Documentation: nfsroot.txt: convert to ReST") Signed-off-by: Niklas Söderlund Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200212181332.520545-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++---- Documentation/filesystems/cifs/cifsroot.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3e3fd0d19e53..4220477079bd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1885,7 +1885,7 @@ No delay ip= [IP_PNP] - See Documentation/filesystems/nfs/nfsroot.txt. + See Documentation/admin-guide/nfs/nfsroot.rst. ipcmni_extend [KNL] Extend the maximum number of unique System V IPC identifiers from 32,768 to 16,777,216. @@ -2855,13 +2855,13 @@ Default value is 0. nfsaddrs= [NFS] Deprecated. Use ip= instead. - See Documentation/filesystems/nfs/nfsroot.txt. + See Documentation/admin-guide/nfs/nfsroot.rst. nfsroot= [NFS] nfs root filesystem for disk-less boxes. - See Documentation/filesystems/nfs/nfsroot.txt. + See Documentation/admin-guide/nfs/nfsroot.rst. nfsrootdebug [NFS] enable nfsroot debugging messages. - See Documentation/filesystems/nfs/nfsroot.txt. + See Documentation/admin-guide/nfs/nfsroot.rst. nfs.callback_nr_threads= [NFSv4] set the total number of threads that the diff --git a/Documentation/filesystems/cifs/cifsroot.txt b/Documentation/filesystems/cifs/cifsroot.txt index 0fa1a2c36a40..947b7ec6ce9e 100644 --- a/Documentation/filesystems/cifs/cifsroot.txt +++ b/Documentation/filesystems/cifs/cifsroot.txt @@ -13,7 +13,7 @@ network by utilizing SMB or CIFS protocol. In order to mount, the network stack will also need to be set up by using 'ip=' config option. For more details, see -Documentation/filesystems/nfs/nfsroot.txt. +Documentation/admin-guide/nfs/nfsroot.rst. A CIFS root mount currently requires the use of SMB1+UNIX Extensions which is only supported by the Samba server. SMB1 is the older -- cgit v1.2.3 From 07d241fd66ba99111d43a0a4c4abeeb972468d1d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:47 +0100 Subject: docs: filesystems: convert 9p.txt to ReST - Add a SPDX header; - Add a document title; - Adjust section titles; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/96a060b7b5c0c3838ab1751addfe4d6d3bc37bd6.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/9p.rst | 185 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/9p.txt | 161 ------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 186 insertions(+), 161 deletions(-) create mode 100644 Documentation/filesystems/9p.rst delete mode 100644 Documentation/filesystems/9p.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst new file mode 100644 index 000000000000..f054d1c45e86 --- /dev/null +++ b/Documentation/filesystems/9p.rst @@ -0,0 +1,185 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================= +v9fs: Plan 9 Resource Sharing for Linux +======================================= + +About +===== + +v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol. + +This software was originally developed by Ron Minnich +and Maya Gokhale. Additional development by Greg Watson + and most recently Eric Van Hensbergen +, Latchesar Ionkov and Russ Cox +. + +The best detailed explanation of the Linux implementation and applications of +the 9p client is available in the form of a USENIX paper: + + http://www.usenix.org/events/usenix05/tech/freenix/hensbergen.html + +Other applications are described in the following papers: + + * XCPU & Clustering + http://xcpu.org/papers/xcpu-talk.pdf + * KVMFS: control file system for KVM + http://xcpu.org/papers/kvmfs.pdf + * CellFS: A New Programming Model for the Cell BE + http://xcpu.org/papers/cellfs-talk.pdf + * PROSE I/O: Using 9p to enable Application Partitions + http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf + * VirtFS: A Virtualization Aware File System pass-through + http://goo.gl/3WPDg + +Usage +===== + +For remote file server:: + + mount -t 9p 10.10.1.2 /mnt/9 + +For Plan 9 From User Space applications (http://swtch.com/plan9):: + + mount -t 9p `namespace`/acme /mnt/9 -o trans=unix,uname=$USER + +For server running on QEMU host with virtio transport:: + + mount -t 9p -o trans=virtio /mnt/9 + +where mount_tag is the tag associated by the server to each of the exported +mount points. Each 9P export is seen by the client as a virtio device with an +associated "mount_tag" property. Available mount tags can be +seen by reading /sys/bus/virtio/drivers/9pnet_virtio/virtio/mount_tag files. + +Options +======= + + ============= =============================================================== + trans=name select an alternative transport. Valid options are + currently: + + ======== ============================================ + unix specifying a named pipe mount point + tcp specifying a normal TCP/IP connection + fd used passed file descriptors for connection + (see rfdno and wfdno) + virtio connect to the next virtio channel available + (from QEMU with trans_virtio module) + rdma connect to a specified RDMA channel + ======== ============================================ + + uname=name user name to attempt mount as on the remote server. The + server may override or ignore this value. Certain user + names may require authentication. + + aname=name aname specifies the file tree to access when the server is + offering several exported file systems. + + cache=mode specifies a caching policy. By default, no caches are used. + + none + default no cache policy, metadata and data + alike are synchronous. + loose + no attempts are made at consistency, + intended for exclusive, read-only mounts + fscache + use FS-Cache for a persistent, read-only + cache backend. + mmap + minimal cache that is only used for read-write + mmap. Northing else is cached, like cache=none + + debug=n specifies debug level. The debug level is a bitmask. + + ===== ================================ + 0x01 display verbose error messages + 0x02 developer debug (DEBUG_CURRENT) + 0x04 display 9p trace + 0x08 display VFS trace + 0x10 display Marshalling debug + 0x20 display RPC debug + 0x40 display transport debug + 0x80 display allocation debug + 0x100 display protocol message debug + 0x200 display Fid debug + 0x400 display packet debug + 0x800 display fscache tracing debug + ===== ================================ + + rfdno=n the file descriptor for reading with trans=fd + + wfdno=n the file descriptor for writing with trans=fd + + msize=n the number of bytes to use for 9p packet payload + + port=n port to connect to on the remote server + + noextend force legacy mode (no 9p2000.u or 9p2000.L semantics) + + version=name Select 9P protocol version. Valid options are: + + ======== ============================== + 9p2000 Legacy mode (same as noextend) + 9p2000.u Use 9P2000.u protocol + 9p2000.L Use 9P2000.L protocol + ======== ============================== + + dfltuid attempt to mount as a particular uid + + dfltgid attempt to mount with a particular gid + + afid security channel - used by Plan 9 authentication protocols + + nodevmap do not map special files - represent them as normal files. + This can be used to share devices/named pipes/sockets between + hosts. This functionality will be expanded in later versions. + + access there are four access modes. + user + if a user tries to access a file on v9fs + filesystem for the first time, v9fs sends an + attach command (Tattach) for that user. + This is the default mode. + + allows only user with uid= to access + the files on the mounted filesystem + any + v9fs does single attach and performs all + operations as one user + clien + ACL based access check on the 9p client + side for access validation + + cachetag cache tag to use the specified persistent cache. + cache tags for existing cache sessions can be listed at + /sys/fs/9p/caches. (applies only to cache=fscache) + ============= =============================================================== + +Resources +========= + +Protocol specifications are maintained on github: +http://ericvh.github.com/9p-rfc/ + +9p client and server implementations are listed on +http://9p.cat-v.org/implementations + +A 9p2000.L server is being developed by LLNL and can be found +at http://code.google.com/p/diod/ + +There are user and developer mailing lists available through the v9fs project +on sourceforge (http://sourceforge.net/projects/v9fs). + +News and other information is maintained on a Wiki. +(http://sf.net/apps/mediawiki/v9fs/index.php). + +Bug reports are best issued via the mailing list. + +For more information on the Plan 9 Operating System check out +http://plan9.bell-labs.com/plan9 + +For information on Plan 9 from User Space (Plan 9 applications and libraries +ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9 diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt deleted file mode 100644 index fec7144e817c..000000000000 --- a/Documentation/filesystems/9p.txt +++ /dev/null @@ -1,161 +0,0 @@ - v9fs: Plan 9 Resource Sharing for Linux - ======================================= - -ABOUT -===== - -v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol. - -This software was originally developed by Ron Minnich -and Maya Gokhale. Additional development by Greg Watson - and most recently Eric Van Hensbergen -, Latchesar Ionkov and Russ Cox -. - -The best detailed explanation of the Linux implementation and applications of -the 9p client is available in the form of a USENIX paper: - http://www.usenix.org/events/usenix05/tech/freenix/hensbergen.html - -Other applications are described in the following papers: - * XCPU & Clustering - http://xcpu.org/papers/xcpu-talk.pdf - * KVMFS: control file system for KVM - http://xcpu.org/papers/kvmfs.pdf - * CellFS: A New Programming Model for the Cell BE - http://xcpu.org/papers/cellfs-talk.pdf - * PROSE I/O: Using 9p to enable Application Partitions - http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf - * VirtFS: A Virtualization Aware File System pass-through - http://goo.gl/3WPDg - -USAGE -===== - -For remote file server: - - mount -t 9p 10.10.1.2 /mnt/9 - -For Plan 9 From User Space applications (http://swtch.com/plan9) - - mount -t 9p `namespace`/acme /mnt/9 -o trans=unix,uname=$USER - -For server running on QEMU host with virtio transport: - - mount -t 9p -o trans=virtio /mnt/9 - -where mount_tag is the tag associated by the server to each of the exported -mount points. Each 9P export is seen by the client as a virtio device with an -associated "mount_tag" property. Available mount tags can be -seen by reading /sys/bus/virtio/drivers/9pnet_virtio/virtio/mount_tag files. - -OPTIONS -======= - - trans=name select an alternative transport. Valid options are - currently: - unix - specifying a named pipe mount point - tcp - specifying a normal TCP/IP connection - fd - used passed file descriptors for connection - (see rfdno and wfdno) - virtio - connect to the next virtio channel available - (from QEMU with trans_virtio module) - rdma - connect to a specified RDMA channel - - uname=name user name to attempt mount as on the remote server. The - server may override or ignore this value. Certain user - names may require authentication. - - aname=name aname specifies the file tree to access when the server is - offering several exported file systems. - - cache=mode specifies a caching policy. By default, no caches are used. - none = default no cache policy, metadata and data - alike are synchronous. - loose = no attempts are made at consistency, - intended for exclusive, read-only mounts - fscache = use FS-Cache for a persistent, read-only - cache backend. - mmap = minimal cache that is only used for read-write - mmap. Northing else is cached, like cache=none - - debug=n specifies debug level. The debug level is a bitmask. - 0x01 = display verbose error messages - 0x02 = developer debug (DEBUG_CURRENT) - 0x04 = display 9p trace - 0x08 = display VFS trace - 0x10 = display Marshalling debug - 0x20 = display RPC debug - 0x40 = display transport debug - 0x80 = display allocation debug - 0x100 = display protocol message debug - 0x200 = display Fid debug - 0x400 = display packet debug - 0x800 = display fscache tracing debug - - rfdno=n the file descriptor for reading with trans=fd - - wfdno=n the file descriptor for writing with trans=fd - - msize=n the number of bytes to use for 9p packet payload - - port=n port to connect to on the remote server - - noextend force legacy mode (no 9p2000.u or 9p2000.L semantics) - - version=name Select 9P protocol version. Valid options are: - 9p2000 - Legacy mode (same as noextend) - 9p2000.u - Use 9P2000.u protocol - 9p2000.L - Use 9P2000.L protocol - - dfltuid attempt to mount as a particular uid - - dfltgid attempt to mount with a particular gid - - afid security channel - used by Plan 9 authentication protocols - - nodevmap do not map special files - represent them as normal files. - This can be used to share devices/named pipes/sockets between - hosts. This functionality will be expanded in later versions. - - access there are four access modes. - user = if a user tries to access a file on v9fs - filesystem for the first time, v9fs sends an - attach command (Tattach) for that user. - This is the default mode. - = allows only user with uid= to access - the files on the mounted filesystem - any = v9fs does single attach and performs all - operations as one user - client = ACL based access check on the 9p client - side for access validation - - cachetag cache tag to use the specified persistent cache. - cache tags for existing cache sessions can be listed at - /sys/fs/9p/caches. (applies only to cache=fscache) - -RESOURCES -========= - -Protocol specifications are maintained on github: -http://ericvh.github.com/9p-rfc/ - -9p client and server implementations are listed on -http://9p.cat-v.org/implementations - -A 9p2000.L server is being developed by LLNL and can be found -at http://code.google.com/p/diod/ - -There are user and developer mailing lists available through the v9fs project -on sourceforge (http://sourceforge.net/projects/v9fs). - -News and other information is maintained on a Wiki. -(http://sf.net/apps/mediawiki/v9fs/index.php). - -Bug reports are best issued via the mailing list. - -For more information on the Plan 9 Operating System check out -http://plan9.bell-labs.com/plan9 - -For information on Plan 9 from User Space (Plan 9 applications and libraries -ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9 - diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 45d791905e91..a9330c3f8c2e 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -46,6 +46,7 @@ Documentation for filesystem implementations. .. toctree:: :maxdepth: 2 + 9p autofs fuse overlayfs -- cgit v1.2.3 From 348739003d4f7e777ef935a44a91e7494f8ab786 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:48 +0100 Subject: docs: filesystems: convert adfs.txt to ReST - Add a SPDX header; - Add a document title; - Adjust section titles; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/15ee92f03ec917e5d26bd7b863565dec88c843f6.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/adfs.rst | 108 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/adfs.txt | 99 --------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 109 insertions(+), 99 deletions(-) create mode 100644 Documentation/filesystems/adfs.rst delete mode 100644 Documentation/filesystems/adfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/adfs.rst b/Documentation/filesystems/adfs.rst new file mode 100644 index 000000000000..5b22cae38e5e --- /dev/null +++ b/Documentation/filesystems/adfs.rst @@ -0,0 +1,108 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================== +Acorn Disc Filing System - ADFS +=============================== + +Filesystems supported by ADFS +----------------------------- + +The ADFS module supports the following Filecore formats which have: + +- new maps +- new directories or big directories + +In terms of the named formats, this means we support: + +- E and E+, with or without boot block +- F and F+ + +We fully support reading files from these filesystems, and writing to +existing files within their existing allocation. Essentially, we do +not support changing any of the filesystem metadata. + +This is intended to support loopback mounted Linux native filesystems +on a RISC OS Filecore filesystem, but will allow the data within files +to be changed. + +If write support (ADFS_FS_RW) is configured, we allow rudimentary +directory updates, specifically updating the access mode and timestamp. + +Mount options for ADFS +---------------------- + + ============ ====================================================== + uid=nnn All files in the partition will be owned by + user id nnn. Default 0 (root). + gid=nnn All files in the partition will be in group + nnn. Default 0 (root). + ownmask=nnn The permission mask for ADFS 'owner' permissions + will be nnn. Default 0700. + othmask=nnn The permission mask for ADFS 'other' permissions + will be nnn. Default 0077. + ftsuffix=n When ftsuffix=0, no file type suffix will be applied. + When ftsuffix=1, a hexadecimal suffix corresponding to + the RISC OS file type will be added. Default 0. + ============ ====================================================== + +Mapping of ADFS permissions to Linux permissions +------------------------------------------------ + + ADFS permissions consist of the following: + + - Owner read + - Owner write + - Other read + - Other write + + (In older versions, an 'execute' permission did exist, but this + does not hold the same meaning as the Linux 'execute' permission + and is now obsolete). + + The mapping is performed as follows:: + + Owner read -> -r--r--r-- + Owner write -> --w--w---w + Owner read and filetype UnixExec -> ---x--x--x + These are then masked by ownmask, eg 700 -> -rwx------ + Possible owner mode permissions -> -rwx------ + + Other read -> -r--r--r-- + Other write -> --w--w--w- + Other read and filetype UnixExec -> ---x--x--x + These are then masked by othmask, eg 077 -> ----rwxrwx + Possible other mode permissions -> ----rwxrwx + + Hence, with the default masks, if a file is owner read/write, and + not a UnixExec filetype, then the permissions will be:: + + -rw------- + + However, if the masks were ownmask=0770,othmask=0007, then this would + be modified to:: + + -rw-rw---- + + There is no restriction on what you can do with these masks. You may + wish that either read bits give read access to the file for all, but + keep the default write protection (ownmask=0755,othmask=0577):: + + -rw-r--r-- + + You can therefore tailor the permission translation to whatever you + desire the permissions should be under Linux. + +RISC OS file type suffix +------------------------ + + RISC OS file types are stored in bits 19..8 of the file load address. + + To enable non-RISC OS systems to be used to store files without losing + file type information, a file naming convention was devised (initially + for use with NFS) such that a hexadecimal suffix of the form ,xyz + denoted the file type: e.g. BasicFile,ffb is a BASIC (0xffb) file. This + naming convention is now also used by RISC OS emulators such as RPCEmu. + + Mounting an ADFS disc with option ftsuffix=1 will cause appropriate file + type suffixes to be appended to file names read from a directory. If the + ftsuffix option is zero or omitted, no file type suffixes will be added. diff --git a/Documentation/filesystems/adfs.txt b/Documentation/filesystems/adfs.txt deleted file mode 100644 index 0baa8e8c1fc1..000000000000 --- a/Documentation/filesystems/adfs.txt +++ /dev/null @@ -1,99 +0,0 @@ -Filesystems supported by ADFS ------------------------------ - -The ADFS module supports the following Filecore formats which have: - -- new maps -- new directories or big directories - -In terms of the named formats, this means we support: - -- E and E+, with or without boot block -- F and F+ - -We fully support reading files from these filesystems, and writing to -existing files within their existing allocation. Essentially, we do -not support changing any of the filesystem metadata. - -This is intended to support loopback mounted Linux native filesystems -on a RISC OS Filecore filesystem, but will allow the data within files -to be changed. - -If write support (ADFS_FS_RW) is configured, we allow rudimentary -directory updates, specifically updating the access mode and timestamp. - -Mount options for ADFS ----------------------- - - uid=nnn All files in the partition will be owned by - user id nnn. Default 0 (root). - gid=nnn All files in the partition will be in group - nnn. Default 0 (root). - ownmask=nnn The permission mask for ADFS 'owner' permissions - will be nnn. Default 0700. - othmask=nnn The permission mask for ADFS 'other' permissions - will be nnn. Default 0077. - ftsuffix=n When ftsuffix=0, no file type suffix will be applied. - When ftsuffix=1, a hexadecimal suffix corresponding to - the RISC OS file type will be added. Default 0. - -Mapping of ADFS permissions to Linux permissions ------------------------------------------------- - - ADFS permissions consist of the following: - - Owner read - Owner write - Other read - Other write - - (In older versions, an 'execute' permission did exist, but this - does not hold the same meaning as the Linux 'execute' permission - and is now obsolete). - - The mapping is performed as follows: - - Owner read -> -r--r--r-- - Owner write -> --w--w---w - Owner read and filetype UnixExec -> ---x--x--x - These are then masked by ownmask, eg 700 -> -rwx------ - Possible owner mode permissions -> -rwx------ - - Other read -> -r--r--r-- - Other write -> --w--w--w- - Other read and filetype UnixExec -> ---x--x--x - These are then masked by othmask, eg 077 -> ----rwxrwx - Possible other mode permissions -> ----rwxrwx - - Hence, with the default masks, if a file is owner read/write, and - not a UnixExec filetype, then the permissions will be: - - -rw------- - - However, if the masks were ownmask=0770,othmask=0007, then this would - be modified to: - -rw-rw---- - - There is no restriction on what you can do with these masks. You may - wish that either read bits give read access to the file for all, but - keep the default write protection (ownmask=0755,othmask=0577): - - -rw-r--r-- - - You can therefore tailor the permission translation to whatever you - desire the permissions should be under Linux. - -RISC OS file type suffix ------------------------- - - RISC OS file types are stored in bits 19..8 of the file load address. - - To enable non-RISC OS systems to be used to store files without losing - file type information, a file naming convention was devised (initially - for use with NFS) such that a hexadecimal suffix of the form ,xyz - denoted the file type: e.g. BasicFile,ffb is a BASIC (0xffb) file. This - naming convention is now also used by RISC OS emulators such as RPCEmu. - - Mounting an ADFS disc with option ftsuffix=1 will cause appropriate file - type suffixes to be appended to file names read from a directory. If the - ftsuffix option is zero or omitted, no file type suffixes will be added. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index a9330c3f8c2e..14dc89c94822 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -47,6 +47,7 @@ Documentation for filesystem implementations. :maxdepth: 2 9p + adfs autofs fuse overlayfs -- cgit v1.2.3 From 7627216830d808572fff8225964e9209249ba196 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:49 +0100 Subject: docs: filesystems: convert affs.txt to ReST - Add a SPDX header; - Adjust document title; - Add table markups; - Mark literal blocks as such; - Some whitespace fixes and new line breaks; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: David Sterba Link: https://lore.kernel.org/r/b44c56befe0e28cbc0eb1b3e281ad7d99737ff16.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/affs.rst | 246 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/affs.txt | 222 -------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 247 insertions(+), 222 deletions(-) create mode 100644 Documentation/filesystems/affs.rst delete mode 100644 Documentation/filesystems/affs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/affs.rst b/Documentation/filesystems/affs.rst new file mode 100644 index 000000000000..7f1a40dce6d3 --- /dev/null +++ b/Documentation/filesystems/affs.rst @@ -0,0 +1,246 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================= +Overview of Amiga Filesystems +============================= + +Not all varieties of the Amiga filesystems are supported for reading and +writing. The Amiga currently knows six different filesystems: + +============== =============================================================== +DOS\0 The old or original filesystem, not really suited for + hard disks and normally not used on them, either. + Supported read/write. + +DOS\1 The original Fast File System. Supported read/write. + +DOS\2 The old "international" filesystem. International means that + a bug has been fixed so that accented ("international") letters + in file names are case-insensitive, as they ought to be. + Supported read/write. + +DOS\3 The "international" Fast File System. Supported read/write. + +DOS\4 The original filesystem with directory cache. The directory + cache speeds up directory accesses on floppies considerably, + but slows down file creation/deletion. Doesn't make much + sense on hard disks. Supported read only. + +DOS\5 The Fast File System with directory cache. Supported read only. +============== =============================================================== + +All of the above filesystems allow block sizes from 512 to 32K bytes. +Supported block sizes are: 512, 1024, 2048 and 4096 bytes. Larger blocks +speed up almost everything at the expense of wasted disk space. The speed +gain above 4K seems not really worth the price, so you don't lose too +much here, either. + +The muFS (multi user File System) equivalents of the above file systems +are supported, too. + +Mount options for the AFFS +========================== + +protect + If this option is set, the protection bits cannot be altered. + +setuid[=uid] + This sets the owner of all files and directories in the file + system to uid or the uid of the current user, respectively. + +setgid[=gid] + Same as above, but for gid. + +mode=mode + Sets the mode flags to the given (octal) value, regardless + of the original permissions. Directories will get an x + permission if the corresponding r bit is set. + This is useful since most of the plain AmigaOS files + will map to 600. + +nofilenametruncate + The file system will return an error when filename exceeds + standard maximum filename length (30 characters). + +reserved=num + Sets the number of reserved blocks at the start of the + partition to num. You should never need this option. + Default is 2. + +root=block + Sets the block number of the root block. This should never + be necessary. + +bs=blksize + Sets the blocksize to blksize. Valid block sizes are 512, + 1024, 2048 and 4096. Like the root option, this should + never be necessary, as the affs can figure it out itself. + +quiet + The file system will not return an error for disallowed + mode changes. + +verbose + The volume name, file system type and block size will + be written to the syslog when the filesystem is mounted. + +mufs + The filesystem is really a muFS, also it doesn't + identify itself as one. This option is necessary if + the filesystem wasn't formatted as muFS, but is used + as one. + +prefix=path + Path will be prefixed to every absolute path name of + symbolic links on an AFFS partition. Default = "/". + (See below.) + +volume=name + When symbolic links with an absolute path are created + on an AFFS partition, name will be prepended as the + volume name. Default = "" (empty string). + (See below.) + +Handling of the Users/Groups and protection flags +================================================= + +Amiga -> Linux: + +The Amiga protection flags RWEDRWEDHSPARWED are handled as follows: + + - R maps to r for user, group and others. On directories, R implies x. + + - If both W and D are allowed, w will be set. + + - E maps to x. + + - H and P are always retained and ignored under Linux. + + - A is always reset when a file is written to. + +User id and group id will be used unless set[gu]id are given as mount +options. Since most of the Amiga file systems are single user systems +they will be owned by root. The root directory (the mount point) of the +Amiga filesystem will be owned by the user who actually mounts the +filesystem (the root directory doesn't have uid/gid fields). + +Linux -> Amiga: + +The Linux rwxrwxrwx file mode is handled as follows: + + - r permission will set R for user, group and others. + + - w permission will set W and D for user, group and others. + + - x permission of the user will set E for plain files. + + - All other flags (suid, sgid, ...) are ignored and will + not be retained. + +Newly created files and directories will get the user and group ID +of the current user and a mode according to the umask. + +Symbolic links +============== + +Although the Amiga and Linux file systems resemble each other, there +are some, not always subtle, differences. One of them becomes apparent +with symbolic links. While Linux has a file system with exactly one +root directory, the Amiga has a separate root directory for each +file system (for example, partition, floppy disk, ...). With the Amiga, +these entities are called "volumes". They have symbolic names which +can be used to access them. Thus, symbolic links can point to a +different volume. AFFS turns the volume name into a directory name +and prepends the prefix path (see prefix option) to it. + +Example: +You mount all your Amiga partitions under /amiga/ (where + is the name of the volume), and you give the option +"prefix=/amiga/" when mounting all your AFFS partitions. (They +might be "User", "WB" and "Graphics", the mount points /amiga/User, +/amiga/WB and /amiga/Graphics). A symbolic link referring to +"User:sc/include/dos/dos.h" will be followed to +"/amiga/User/sc/include/dos/dos.h". + +Examples +======== + +Command line:: + + mount Archive/Amiga/Workbench3.1.adf /mnt -t affs -o loop,verbose + mount /dev/sda3 /Amiga -t affs + +/etc/fstab entry:: + + /dev/sdb5 /amiga/Workbench affs noauto,user,exec,verbose 0 0 + +IMPORTANT NOTE +============== + +If you boot Windows 95 (don't know about 3.x, 98 and NT) while you +have an Amiga harddisk connected to your PC, it will overwrite +the bytes 0x00dc..0x00df of block 0 with garbage, thus invalidating +the Rigid Disk Block. Sheer luck has it that this is an unused +area of the RDB, so only the checksum doesn't match anymore. +Linux will ignore this garbage and recognize the RDB anyway, but +before you connect that drive to your Amiga again, you must +restore or repair your RDB. So please do make a backup copy of it +before booting Windows! + +If the damage is already done, the following should fix the RDB +(where is the device name). + +DO AT YOUR OWN RISK:: + + dd if=/dev/ of=rdb.tmp count=1 + cp rdb.tmp rdb.fixed + dd if=/dev/zero of=rdb.fixed bs=1 seek=220 count=4 + dd if=rdb.fixed of=/dev/ + +Bugs, Restrictions, Caveats +=========================== + +Quite a few things may not work as advertised. Not everything is +tested, though several hundred MB have been read and written using +this fs. For a most up-to-date list of bugs please consult +fs/affs/Changes. + +By default, filenames are truncated to 30 characters without warning. +'nofilenametruncate' mount option can change that behavior. + +Case is ignored by the affs in filename matching, but Linux shells +do care about the case. Example (with /wb being an affs mounted fs):: + + rm /wb/WRONGCASE + +will remove /mnt/wrongcase, but:: + + rm /wb/WR* + +will not since the names are matched by the shell. + +The block allocation is designed for hard disk partitions. If more +than 1 process writes to a (small) diskette, the blocks are allocated +in an ugly way (but the real AFFS doesn't do much better). This +is also true when space gets tight. + +You cannot execute programs on an OFS (Old File System), since the +program files cannot be memory mapped due to the 488 byte blocks. +For the same reason you cannot mount an image on such a filesystem +via the loopback device. + +The bitmap valid flag in the root block may not be accurate when the +system crashes while an affs partition is mounted. There's currently +no way to fix a garbled filesystem without an Amiga (disk validator) +or manually (who would do this?). Maybe later. + +If you mount affs partitions on system startup, you may want to tell +fsck that the fs should not be checked (place a '0' in the sixth field +of /etc/fstab). + +It's not possible to read floppy disks with a normal PC or workstation +due to an incompatibility with the Amiga floppy controller. + +If you are interested in an Amiga Emulator for Linux, look at + +http://web.archive.org/web/%2E/http://www.freiburg.linux.de/~uae/ diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.txt deleted file mode 100644 index 71b63c2b9841..000000000000 --- a/Documentation/filesystems/affs.txt +++ /dev/null @@ -1,222 +0,0 @@ -Overview of Amiga Filesystems -============================= - -Not all varieties of the Amiga filesystems are supported for reading and -writing. The Amiga currently knows six different filesystems: - -DOS\0 The old or original filesystem, not really suited for - hard disks and normally not used on them, either. - Supported read/write. - -DOS\1 The original Fast File System. Supported read/write. - -DOS\2 The old "international" filesystem. International means that - a bug has been fixed so that accented ("international") letters - in file names are case-insensitive, as they ought to be. - Supported read/write. - -DOS\3 The "international" Fast File System. Supported read/write. - -DOS\4 The original filesystem with directory cache. The directory - cache speeds up directory accesses on floppies considerably, - but slows down file creation/deletion. Doesn't make much - sense on hard disks. Supported read only. - -DOS\5 The Fast File System with directory cache. Supported read only. - -All of the above filesystems allow block sizes from 512 to 32K bytes. -Supported block sizes are: 512, 1024, 2048 and 4096 bytes. Larger blocks -speed up almost everything at the expense of wasted disk space. The speed -gain above 4K seems not really worth the price, so you don't lose too -much here, either. - -The muFS (multi user File System) equivalents of the above file systems -are supported, too. - -Mount options for the AFFS -========================== - -protect If this option is set, the protection bits cannot be altered. - -setuid[=uid] This sets the owner of all files and directories in the file - system to uid or the uid of the current user, respectively. - -setgid[=gid] Same as above, but for gid. - -mode=mode Sets the mode flags to the given (octal) value, regardless - of the original permissions. Directories will get an x - permission if the corresponding r bit is set. - This is useful since most of the plain AmigaOS files - will map to 600. - -nofilenametruncate - The file system will return an error when filename exceeds - standard maximum filename length (30 characters). - -reserved=num Sets the number of reserved blocks at the start of the - partition to num. You should never need this option. - Default is 2. - -root=block Sets the block number of the root block. This should never - be necessary. - -bs=blksize Sets the blocksize to blksize. Valid block sizes are 512, - 1024, 2048 and 4096. Like the root option, this should - never be necessary, as the affs can figure it out itself. - -quiet The file system will not return an error for disallowed - mode changes. - -verbose The volume name, file system type and block size will - be written to the syslog when the filesystem is mounted. - -mufs The filesystem is really a muFS, also it doesn't - identify itself as one. This option is necessary if - the filesystem wasn't formatted as muFS, but is used - as one. - -prefix=path Path will be prefixed to every absolute path name of - symbolic links on an AFFS partition. Default = "/". - (See below.) - -volume=name When symbolic links with an absolute path are created - on an AFFS partition, name will be prepended as the - volume name. Default = "" (empty string). - (See below.) - -Handling of the Users/Groups and protection flags -================================================= - -Amiga -> Linux: - -The Amiga protection flags RWEDRWEDHSPARWED are handled as follows: - - - R maps to r for user, group and others. On directories, R implies x. - - - If both W and D are allowed, w will be set. - - - E maps to x. - - - H and P are always retained and ignored under Linux. - - - A is always reset when a file is written to. - -User id and group id will be used unless set[gu]id are given as mount -options. Since most of the Amiga file systems are single user systems -they will be owned by root. The root directory (the mount point) of the -Amiga filesystem will be owned by the user who actually mounts the -filesystem (the root directory doesn't have uid/gid fields). - -Linux -> Amiga: - -The Linux rwxrwxrwx file mode is handled as follows: - - - r permission will set R for user, group and others. - - - w permission will set W and D for user, group and others. - - - x permission of the user will set E for plain files. - - - All other flags (suid, sgid, ...) are ignored and will - not be retained. - -Newly created files and directories will get the user and group ID -of the current user and a mode according to the umask. - -Symbolic links -============== - -Although the Amiga and Linux file systems resemble each other, there -are some, not always subtle, differences. One of them becomes apparent -with symbolic links. While Linux has a file system with exactly one -root directory, the Amiga has a separate root directory for each -file system (for example, partition, floppy disk, ...). With the Amiga, -these entities are called "volumes". They have symbolic names which -can be used to access them. Thus, symbolic links can point to a -different volume. AFFS turns the volume name into a directory name -and prepends the prefix path (see prefix option) to it. - -Example: -You mount all your Amiga partitions under /amiga/ (where - is the name of the volume), and you give the option -"prefix=/amiga/" when mounting all your AFFS partitions. (They -might be "User", "WB" and "Graphics", the mount points /amiga/User, -/amiga/WB and /amiga/Graphics). A symbolic link referring to -"User:sc/include/dos/dos.h" will be followed to -"/amiga/User/sc/include/dos/dos.h". - -Examples -======== - -Command line: - mount Archive/Amiga/Workbench3.1.adf /mnt -t affs -o loop,verbose - mount /dev/sda3 /Amiga -t affs - -/etc/fstab entry: - /dev/sdb5 /amiga/Workbench affs noauto,user,exec,verbose 0 0 - -IMPORTANT NOTE -============== - -If you boot Windows 95 (don't know about 3.x, 98 and NT) while you -have an Amiga harddisk connected to your PC, it will overwrite -the bytes 0x00dc..0x00df of block 0 with garbage, thus invalidating -the Rigid Disk Block. Sheer luck has it that this is an unused -area of the RDB, so only the checksum doesn't match anymore. -Linux will ignore this garbage and recognize the RDB anyway, but -before you connect that drive to your Amiga again, you must -restore or repair your RDB. So please do make a backup copy of it -before booting Windows! - -If the damage is already done, the following should fix the RDB -(where is the device name). -DO AT YOUR OWN RISK: - - dd if=/dev/ of=rdb.tmp count=1 - cp rdb.tmp rdb.fixed - dd if=/dev/zero of=rdb.fixed bs=1 seek=220 count=4 - dd if=rdb.fixed of=/dev/ - -Bugs, Restrictions, Caveats -=========================== - -Quite a few things may not work as advertised. Not everything is -tested, though several hundred MB have been read and written using -this fs. For a most up-to-date list of bugs please consult -fs/affs/Changes. - -By default, filenames are truncated to 30 characters without warning. -'nofilenametruncate' mount option can change that behavior. - -Case is ignored by the affs in filename matching, but Linux shells -do care about the case. Example (with /wb being an affs mounted fs): - rm /wb/WRONGCASE -will remove /mnt/wrongcase, but - rm /wb/WR* -will not since the names are matched by the shell. - -The block allocation is designed for hard disk partitions. If more -than 1 process writes to a (small) diskette, the blocks are allocated -in an ugly way (but the real AFFS doesn't do much better). This -is also true when space gets tight. - -You cannot execute programs on an OFS (Old File System), since the -program files cannot be memory mapped due to the 488 byte blocks. -For the same reason you cannot mount an image on such a filesystem -via the loopback device. - -The bitmap valid flag in the root block may not be accurate when the -system crashes while an affs partition is mounted. There's currently -no way to fix a garbled filesystem without an Amiga (disk validator) -or manually (who would do this?). Maybe later. - -If you mount affs partitions on system startup, you may want to tell -fsck that the fs should not be checked (place a '0' in the sixth field -of /etc/fstab). - -It's not possible to read floppy disks with a normal PC or workstation -due to an incompatibility with the Amiga floppy controller. - -If you are interested in an Amiga Emulator for Linux, look at - -http://web.archive.org/web/*/http://www.freiburg.linux.de/~uae/ diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 14dc89c94822..273d802ad5fb 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -48,6 +48,7 @@ Documentation for filesystem implementations. 9p adfs + affs autofs fuse overlayfs -- cgit v1.2.3 From ca6e9049a0934fe72ffea6990c889205aff0a2cf Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:50 +0100 Subject: docs: filesystems: convert afs.txt to ReST - Add a SPDX header; - Adjust document and section titles; - Comment out text-only ToC; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/d77f5afdb5da0f8b0ec3dbe720aef23f1ce73bb5.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/afs.rst | 251 +++++++++++++++++++++++++++++++++++ Documentation/filesystems/afs.txt | 258 ------------------------------------ Documentation/filesystems/index.rst | 1 + 3 files changed, 252 insertions(+), 258 deletions(-) create mode 100644 Documentation/filesystems/afs.rst delete mode 100644 Documentation/filesystems/afs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/afs.rst b/Documentation/filesystems/afs.rst new file mode 100644 index 000000000000..c4ec39a5966e --- /dev/null +++ b/Documentation/filesystems/afs.rst @@ -0,0 +1,251 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +kAFS: AFS FILESYSTEM +==================== + +.. Contents: + + - Overview. + - Usage. + - Mountpoints. + - Dynamic root. + - Proc filesystem. + - The cell database. + - Security. + - The @sys substitution. + + +Overview +======== + +This filesystem provides a fairly simple secure AFS filesystem driver. It is +under development and does not yet provide the full feature set. The features +it does support include: + + (*) Security (currently only AFS kaserver and KerberosIV tickets). + + (*) File reading and writing. + + (*) Automounting. + + (*) Local caching (via fscache). + +It does not yet support the following AFS features: + + (*) pioctl() system call. + + +Compilation +=========== + +The filesystem should be enabled by turning on the kernel configuration +options:: + + CONFIG_AF_RXRPC - The RxRPC protocol transport + CONFIG_RXKAD - The RxRPC Kerberos security handler + CONFIG_AFS - The AFS filesystem + +Additionally, the following can be turned on to aid debugging:: + + CONFIG_AF_RXRPC_DEBUG - Permit AF_RXRPC debugging to be enabled + CONFIG_AFS_DEBUG - Permit AFS debugging to be enabled + +They permit the debugging messages to be turned on dynamically by manipulating +the masks in the following files:: + + /sys/module/af_rxrpc/parameters/debug + /sys/module/kafs/parameters/debug + + +Usage +===== + +When inserting the driver modules the root cell must be specified along with a +list of volume location server IP addresses:: + + modprobe rxrpc + modprobe kafs rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91 + +The first module is the AF_RXRPC network protocol driver. This provides the +RxRPC remote operation protocol and may also be accessed from userspace. See: + + Documentation/networking/rxrpc.txt + +The second module is the kerberos RxRPC security driver, and the third module +is the actual filesystem driver for the AFS filesystem. + +Once the module has been loaded, more modules can be added by the following +procedure:: + + echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells + +Where the parameters to the "add" command are the name of a cell and a list of +volume location servers within that cell, with the latter separated by colons. + +Filesystems can be mounted anywhere by commands similar to the following:: + + mount -t afs "%cambridge.redhat.com:root.afs." /afs + mount -t afs "#cambridge.redhat.com:root.cell." /afs/cambridge + mount -t afs "#root.afs." /afs + mount -t afs "#root.cell." /afs/cambridge + +Where the initial character is either a hash or a percent symbol depending on +whether you definitely want a R/W volume (percent) or whether you'd prefer a +R/O volume, but are willing to use a R/W volume instead (hash). + +The name of the volume can be suffixes with ".backup" or ".readonly" to +specify connection to only volumes of those types. + +The name of the cell is optional, and if not given during a mount, then the +named volume will be looked up in the cell specified during modprobe. + +Additional cells can be added through /proc (see later section). + + +Mountpoints +=========== + +AFS has a concept of mountpoints. In AFS terms, these are specially formatted +symbolic links (of the same form as the "device name" passed to mount). kAFS +presents these to the user as directories that have a follow-link capability +(ie: symbolic link semantics). If anyone attempts to access them, they will +automatically cause the target volume to be mounted (if possible) on that site. + +Automatically mounted filesystems will be automatically unmounted approximately +twenty minutes after they were last used. Alternatively they can be unmounted +directly with the umount() system call. + +Manually unmounting an AFS volume will cause any idle submounts upon it to be +culled first. If all are culled, then the requested volume will also be +unmounted, otherwise error EBUSY will be returned. + +This can be used by the administrator to attempt to unmount the whole AFS tree +mounted on /afs in one go by doing:: + + umount /afs + + +Dynamic Root +============ + +A mount option is available to create a serverless mount that is only usable +for dynamic lookup. Creating such a mount can be done by, for example:: + + mount -t afs none /afs -o dyn + +This creates a mount that just has an empty directory at the root. Attempting +to look up a name in this directory will cause a mountpoint to be created that +looks up a cell of the same name, for example:: + + ls /afs/grand.central.org/ + + +Proc Filesystem +=============== + +The AFS modules creates a "/proc/fs/afs/" directory and populates it: + + (*) A "cells" file that lists cells currently known to the afs module and + their usage counts:: + + [root@andromeda ~]# cat /proc/fs/afs/cells + USE NAME + 3 cambridge.redhat.com + + (*) A directory per cell that contains files that list volume location + servers, volumes, and active servers known within that cell:: + + [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/servers + USE ADDR STATE + 4 172.16.18.91 0 + [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/vlservers + ADDRESS + 172.16.18.91 + [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/volumes + USE STT VLID[0] VLID[1] VLID[2] NAME + 1 Val 20000000 20000001 20000002 root.afs + + +The Cell Database +================= + +The filesystem maintains an internal database of all the cells it knows and the +IP addresses of the volume location servers for those cells. The cell to which +the system belongs is added to the database when modprobe is performed by the +"rootcell=" argument or, if compiled in, using a "kafs.rootcell=" argument on +the kernel command line. + +Further cells can be added by commands similar to the following:: + + echo add CELLNAME VLADDR[:VLADDR][:VLADDR]... >/proc/fs/afs/cells + echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells + +No other cell database operations are available at this time. + + +Security +======== + +Secure operations are initiated by acquiring a key using the klog program. A +very primitive klog program is available at: + + http://people.redhat.com/~dhowells/rxrpc/klog.c + +This should be compiled by:: + + make klog LDLIBS="-lcrypto -lcrypt -lkrb4 -lkeyutils" + +And then run as:: + + ./klog + +Assuming it's successful, this adds a key of type RxRPC, named for the service +and cell, eg: "afs@". This can be viewed with the keyctl program or +by cat'ing /proc/keys:: + + [root@andromeda ~]# keyctl show + Session Keyring + -3 --alswrv 0 0 keyring: _ses.3268 + 2 --alswrv 0 0 \_ keyring: _uid.0 + 111416553 --als--v 0 0 \_ rxrpc: afs@CAMBRIDGE.REDHAT.COM + +Currently the username, realm, password and proposed ticket lifetime are +compiled in to the program. + +It is not required to acquire a key before using AFS facilities, but if one is +not acquired then all operations will be governed by the anonymous user parts +of the ACLs. + +If a key is acquired, then all AFS operations, including mounts and automounts, +made by a possessor of that key will be secured with that key. + +If a file is opened with a particular key and then the file descriptor is +passed to a process that doesn't have that key (perhaps over an AF_UNIX +socket), then the operations on the file will be made with key that was used to +open the file. + + +The @sys Substitution +===================== + +The list of up to 16 @sys substitutions for the current network namespace can +be configured by writing a list to /proc/fs/afs/sysname:: + + [root@andromeda ~]# echo foo amd64_linux_26 >/proc/fs/afs/sysname + +or cleared entirely by writing an empty list:: + + [root@andromeda ~]# echo >/proc/fs/afs/sysname + +The current list for current network namespace can be retrieved by:: + + [root@andromeda ~]# cat /proc/fs/afs/sysname + foo + amd64_linux_26 + +When @sys is being substituted for, each element of the list is tried in the +order given. + +By default, the list will contain one item that conforms to the pattern +"_linux_26", amd64 being the name for x86_64. diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt deleted file mode 100644 index 8c6ea7b41048..000000000000 --- a/Documentation/filesystems/afs.txt +++ /dev/null @@ -1,258 +0,0 @@ - ==================== - kAFS: AFS FILESYSTEM - ==================== - -Contents: - - - Overview. - - Usage. - - Mountpoints. - - Dynamic root. - - Proc filesystem. - - The cell database. - - Security. - - The @sys substitution. - - -======== -OVERVIEW -======== - -This filesystem provides a fairly simple secure AFS filesystem driver. It is -under development and does not yet provide the full feature set. The features -it does support include: - - (*) Security (currently only AFS kaserver and KerberosIV tickets). - - (*) File reading and writing. - - (*) Automounting. - - (*) Local caching (via fscache). - -It does not yet support the following AFS features: - - (*) pioctl() system call. - - -=========== -COMPILATION -=========== - -The filesystem should be enabled by turning on the kernel configuration -options: - - CONFIG_AF_RXRPC - The RxRPC protocol transport - CONFIG_RXKAD - The RxRPC Kerberos security handler - CONFIG_AFS - The AFS filesystem - -Additionally, the following can be turned on to aid debugging: - - CONFIG_AF_RXRPC_DEBUG - Permit AF_RXRPC debugging to be enabled - CONFIG_AFS_DEBUG - Permit AFS debugging to be enabled - -They permit the debugging messages to be turned on dynamically by manipulating -the masks in the following files: - - /sys/module/af_rxrpc/parameters/debug - /sys/module/kafs/parameters/debug - - -===== -USAGE -===== - -When inserting the driver modules the root cell must be specified along with a -list of volume location server IP addresses: - - modprobe rxrpc - modprobe kafs rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91 - -The first module is the AF_RXRPC network protocol driver. This provides the -RxRPC remote operation protocol and may also be accessed from userspace. See: - - Documentation/networking/rxrpc.txt - -The second module is the kerberos RxRPC security driver, and the third module -is the actual filesystem driver for the AFS filesystem. - -Once the module has been loaded, more modules can be added by the following -procedure: - - echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells - -Where the parameters to the "add" command are the name of a cell and a list of -volume location servers within that cell, with the latter separated by colons. - -Filesystems can be mounted anywhere by commands similar to the following: - - mount -t afs "%cambridge.redhat.com:root.afs." /afs - mount -t afs "#cambridge.redhat.com:root.cell." /afs/cambridge - mount -t afs "#root.afs." /afs - mount -t afs "#root.cell." /afs/cambridge - -Where the initial character is either a hash or a percent symbol depending on -whether you definitely want a R/W volume (percent) or whether you'd prefer a -R/O volume, but are willing to use a R/W volume instead (hash). - -The name of the volume can be suffixes with ".backup" or ".readonly" to -specify connection to only volumes of those types. - -The name of the cell is optional, and if not given during a mount, then the -named volume will be looked up in the cell specified during modprobe. - -Additional cells can be added through /proc (see later section). - - -=========== -MOUNTPOINTS -=========== - -AFS has a concept of mountpoints. In AFS terms, these are specially formatted -symbolic links (of the same form as the "device name" passed to mount). kAFS -presents these to the user as directories that have a follow-link capability -(ie: symbolic link semantics). If anyone attempts to access them, they will -automatically cause the target volume to be mounted (if possible) on that site. - -Automatically mounted filesystems will be automatically unmounted approximately -twenty minutes after they were last used. Alternatively they can be unmounted -directly with the umount() system call. - -Manually unmounting an AFS volume will cause any idle submounts upon it to be -culled first. If all are culled, then the requested volume will also be -unmounted, otherwise error EBUSY will be returned. - -This can be used by the administrator to attempt to unmount the whole AFS tree -mounted on /afs in one go by doing: - - umount /afs - - -============ -DYNAMIC ROOT -============ - -A mount option is available to create a serverless mount that is only usable -for dynamic lookup. Creating such a mount can be done by, for example: - - mount -t afs none /afs -o dyn - -This creates a mount that just has an empty directory at the root. Attempting -to look up a name in this directory will cause a mountpoint to be created that -looks up a cell of the same name, for example: - - ls /afs/grand.central.org/ - - -=============== -PROC FILESYSTEM -=============== - -The AFS modules creates a "/proc/fs/afs/" directory and populates it: - - (*) A "cells" file that lists cells currently known to the afs module and - their usage counts: - - [root@andromeda ~]# cat /proc/fs/afs/cells - USE NAME - 3 cambridge.redhat.com - - (*) A directory per cell that contains files that list volume location - servers, volumes, and active servers known within that cell. - - [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/servers - USE ADDR STATE - 4 172.16.18.91 0 - [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/vlservers - ADDRESS - 172.16.18.91 - [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/volumes - USE STT VLID[0] VLID[1] VLID[2] NAME - 1 Val 20000000 20000001 20000002 root.afs - - -================= -THE CELL DATABASE -================= - -The filesystem maintains an internal database of all the cells it knows and the -IP addresses of the volume location servers for those cells. The cell to which -the system belongs is added to the database when modprobe is performed by the -"rootcell=" argument or, if compiled in, using a "kafs.rootcell=" argument on -the kernel command line. - -Further cells can be added by commands similar to the following: - - echo add CELLNAME VLADDR[:VLADDR][:VLADDR]... >/proc/fs/afs/cells - echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells - -No other cell database operations are available at this time. - - -======== -SECURITY -======== - -Secure operations are initiated by acquiring a key using the klog program. A -very primitive klog program is available at: - - http://people.redhat.com/~dhowells/rxrpc/klog.c - -This should be compiled by: - - make klog LDLIBS="-lcrypto -lcrypt -lkrb4 -lkeyutils" - -And then run as: - - ./klog - -Assuming it's successful, this adds a key of type RxRPC, named for the service -and cell, eg: "afs@". This can be viewed with the keyctl program or -by cat'ing /proc/keys: - - [root@andromeda ~]# keyctl show - Session Keyring - -3 --alswrv 0 0 keyring: _ses.3268 - 2 --alswrv 0 0 \_ keyring: _uid.0 - 111416553 --als--v 0 0 \_ rxrpc: afs@CAMBRIDGE.REDHAT.COM - -Currently the username, realm, password and proposed ticket lifetime are -compiled in to the program. - -It is not required to acquire a key before using AFS facilities, but if one is -not acquired then all operations will be governed by the anonymous user parts -of the ACLs. - -If a key is acquired, then all AFS operations, including mounts and automounts, -made by a possessor of that key will be secured with that key. - -If a file is opened with a particular key and then the file descriptor is -passed to a process that doesn't have that key (perhaps over an AF_UNIX -socket), then the operations on the file will be made with key that was used to -open the file. - - -===================== -THE @SYS SUBSTITUTION -===================== - -The list of up to 16 @sys substitutions for the current network namespace can -be configured by writing a list to /proc/fs/afs/sysname: - - [root@andromeda ~]# echo foo amd64_linux_26 >/proc/fs/afs/sysname - -or cleared entirely by writing an empty list: - - [root@andromeda ~]# echo >/proc/fs/afs/sysname - -The current list for current network namespace can be retrieved by: - - [root@andromeda ~]# cat /proc/fs/afs/sysname - foo - amd64_linux_26 - -When @sys is being substituted for, each element of the list is tried in the -order given. - -By default, the list will contain one item that conforms to the pattern -"_linux_26", amd64 being the name for x86_64. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 273d802ad5fb..0598bc52abdc 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -49,6 +49,7 @@ Documentation for filesystem implementations. 9p adfs affs + afs autofs fuse overlayfs -- cgit v1.2.3 From c64d3dc69f38a08d082813f1c0425d7a108ef950 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:51 +0100 Subject: docs: filesystems: convert autofs-mount-control.txt to ReST - Add a SPDX header; - Adjust document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/8cae057ae244d0f5b58d3c209bcdae5ed82bc52c.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/autofs-mount-control.rst | 410 +++++++++++++++++++++ Documentation/filesystems/autofs-mount-control.txt | 408 -------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 411 insertions(+), 408 deletions(-) create mode 100644 Documentation/filesystems/autofs-mount-control.rst delete mode 100644 Documentation/filesystems/autofs-mount-control.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/autofs-mount-control.rst b/Documentation/filesystems/autofs-mount-control.rst new file mode 100644 index 000000000000..2903aed92316 --- /dev/null +++ b/Documentation/filesystems/autofs-mount-control.rst @@ -0,0 +1,410 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================================================================== +Miscellaneous Device control operations for the autofs kernel module +==================================================================== + +The problem +=========== + +There is a problem with active restarts in autofs (that is to say +restarting autofs when there are busy mounts). + +During normal operation autofs uses a file descriptor opened on the +directory that is being managed in order to be able to issue control +operations. Using a file descriptor gives ioctl operations access to +autofs specific information stored in the super block. The operations +are things such as setting an autofs mount catatonic, setting the +expire timeout and requesting expire checks. As is explained below, +certain types of autofs triggered mounts can end up covering an autofs +mount itself which prevents us being able to use open(2) to obtain a +file descriptor for these operations if we don't already have one open. + +Currently autofs uses "umount -l" (lazy umount) to clear active mounts +at restart. While using lazy umount works for most cases, anything that +needs to walk back up the mount tree to construct a path, such as +getcwd(2) and the proc file system /proc//cwd, no longer works +because the point from which the path is constructed has been detached +from the mount tree. + +The actual problem with autofs is that it can't reconnect to existing +mounts. Immediately one thinks of just adding the ability to remount +autofs file systems would solve it, but alas, that can't work. This is +because autofs direct mounts and the implementation of "on demand mount +and expire" of nested mount trees have the file system mounted directly +on top of the mount trigger directory dentry. + +For example, there are two types of automount maps, direct (in the kernel +module source you will see a third type called an offset, which is just +a direct mount in disguise) and indirect. + +Here is a master map with direct and indirect map entries:: + + /- /etc/auto.direct + /test /etc/auto.indirect + +and the corresponding map files:: + + /etc/auto.direct: + + /automount/dparse/g6 budgie:/autofs/export1 + /automount/dparse/g1 shark:/autofs/export1 + and so on. + +/etc/auto.indirect:: + + g1 shark:/autofs/export1 + g6 budgie:/autofs/export1 + and so on. + +For the above indirect map an autofs file system is mounted on /test and +mounts are triggered for each sub-directory key by the inode lookup +operation. So we see a mount of shark:/autofs/export1 on /test/g1, for +example. + +The way that direct mounts are handled is by making an autofs mount on +each full path, such as /automount/dparse/g1, and using it as a mount +trigger. So when we walk on the path we mount shark:/autofs/export1 "on +top of this mount point". Since these are always directories we can +use the follow_link inode operation to trigger the mount. + +But, each entry in direct and indirect maps can have offsets (making +them multi-mount map entries). + +For example, an indirect mount map entry could also be:: + + g1 \ + / shark:/autofs/export5/testing/test \ + /s1 shark:/autofs/export/testing/test/s1 \ + /s2 shark:/autofs/export5/testing/test/s2 \ + /s1/ss1 shark:/autofs/export1 \ + /s2/ss2 shark:/autofs/export2 + +and a similarly a direct mount map entry could also be:: + + /automount/dparse/g1 \ + / shark:/autofs/export5/testing/test \ + /s1 shark:/autofs/export/testing/test/s1 \ + /s2 shark:/autofs/export5/testing/test/s2 \ + /s1/ss1 shark:/autofs/export2 \ + /s2/ss2 shark:/autofs/export2 + +One of the issues with version 4 of autofs was that, when mounting an +entry with a large number of offsets, possibly with nesting, we needed +to mount and umount all of the offsets as a single unit. Not really a +problem, except for people with a large number of offsets in map entries. +This mechanism is used for the well known "hosts" map and we have seen +cases (in 2.4) where the available number of mounts are exhausted or +where the number of privileged ports available is exhausted. + +In version 5 we mount only as we go down the tree of offsets and +similarly for expiring them which resolves the above problem. There is +somewhat more detail to the implementation but it isn't needed for the +sake of the problem explanation. The one important detail is that these +offsets are implemented using the same mechanism as the direct mounts +above and so the mount points can be covered by a mount. + +The current autofs implementation uses an ioctl file descriptor opened +on the mount point for control operations. The references held by the +descriptor are accounted for in checks made to determine if a mount is +in use and is also used to access autofs file system information held +in the mount super block. So the use of a file handle needs to be +retained. + + +The Solution +============ + +To be able to restart autofs leaving existing direct, indirect and +offset mounts in place we need to be able to obtain a file handle +for these potentially covered autofs mount points. Rather than just +implement an isolated operation it was decided to re-implement the +existing ioctl interface and add new operations to provide this +functionality. + +In addition, to be able to reconstruct a mount tree that has busy mounts, +the uid and gid of the last user that triggered the mount needs to be +available because these can be used as macro substitution variables in +autofs maps. They are recorded at mount request time and an operation +has been added to retrieve them. + +Since we're re-implementing the control interface, a couple of other +problems with the existing interface have been addressed. First, when +a mount or expire operation completes a status is returned to the +kernel by either a "send ready" or a "send fail" operation. The +"send fail" operation of the ioctl interface could only ever send +ENOENT so the re-implementation allows user space to send an actual +status. Another expensive operation in user space, for those using +very large maps, is discovering if a mount is present. Usually this +involves scanning /proc/mounts and since it needs to be done quite +often it can introduce significant overhead when there are many entries +in the mount table. An operation to lookup the mount status of a mount +point dentry (covered or not) has also been added. + +Current kernel development policy recommends avoiding the use of the +ioctl mechanism in favor of systems such as Netlink. An implementation +using this system was attempted to evaluate its suitability and it was +found to be inadequate, in this case. The Generic Netlink system was +used for this as raw Netlink would lead to a significant increase in +complexity. There's no question that the Generic Netlink system is an +elegant solution for common case ioctl functions but it's not a complete +replacement probably because its primary purpose in life is to be a +message bus implementation rather than specifically an ioctl replacement. +While it would be possible to work around this there is one concern +that lead to the decision to not use it. This is that the autofs +expire in the daemon has become far to complex because umount +candidates are enumerated, almost for no other reason than to "count" +the number of times to call the expire ioctl. This involves scanning +the mount table which has proved to be a big overhead for users with +large maps. The best way to improve this is try and get back to the +way the expire was done long ago. That is, when an expire request is +issued for a mount (file handle) we should continually call back to +the daemon until we can't umount any more mounts, then return the +appropriate status to the daemon. At the moment we just expire one +mount at a time. A Generic Netlink implementation would exclude this +possibility for future development due to the requirements of the +message bus architecture. + + +autofs Miscellaneous Device mount control interface +==================================================== + +The control interface is opening a device node, typically /dev/autofs. + +All the ioctls use a common structure to pass the needed parameter +information and return operation results:: + + struct autofs_dev_ioctl { + __u32 ver_major; + __u32 ver_minor; + __u32 size; /* total size of data passed in + * including this struct */ + __s32 ioctlfd; /* automount command fd */ + + /* Command parameters */ + union { + struct args_protover protover; + struct args_protosubver protosubver; + struct args_openmount openmount; + struct args_ready ready; + struct args_fail fail; + struct args_setpipefd setpipefd; + struct args_timeout timeout; + struct args_requester requester; + struct args_expire expire; + struct args_askumount askumount; + struct args_ismountpoint ismountpoint; + }; + + char path[0]; + }; + +The ioctlfd field is a mount point file descriptor of an autofs mount +point. It is returned by the open call and is used by all calls except +the check for whether a given path is a mount point, where it may +optionally be used to check a specific mount corresponding to a given +mount point file descriptor, and when requesting the uid and gid of the +last successful mount on a directory within the autofs file system. + +The union is used to communicate parameters and results of calls made +as described below. + +The path field is used to pass a path where it is needed and the size field +is used account for the increased structure length when translating the +structure sent from user space. + +This structure can be initialized before setting specific fields by using +the void function call init_autofs_dev_ioctl(``struct autofs_dev_ioctl *``). + +All of the ioctls perform a copy of this structure from user space to +kernel space and return -EINVAL if the size parameter is smaller than +the structure size itself, -ENOMEM if the kernel memory allocation fails +or -EFAULT if the copy itself fails. Other checks include a version check +of the compiled in user space version against the module version and a +mismatch results in a -EINVAL return. If the size field is greater than +the structure size then a path is assumed to be present and is checked to +ensure it begins with a "/" and is NULL terminated, otherwise -EINVAL is +returned. Following these checks, for all ioctl commands except +AUTOFS_DEV_IOCTL_VERSION_CMD, AUTOFS_DEV_IOCTL_OPENMOUNT_CMD and +AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD the ioctlfd is validated and if it is +not a valid descriptor or doesn't correspond to an autofs mount point +an error of -EBADF, -ENOTTY or -EINVAL (not an autofs descriptor) is +returned. + + +The ioctls +========== + +An example of an implementation which uses this interface can be seen +in autofs version 5.0.4 and later in file lib/dev-ioctl-lib.c of the +distribution tar available for download from kernel.org in directory +/pub/linux/daemons/autofs/v5. + +The device node ioctl operations implemented by this interface are: + + +AUTOFS_DEV_IOCTL_VERSION +------------------------ + +Get the major and minor version of the autofs device ioctl kernel module +implementation. It requires an initialized struct autofs_dev_ioctl as an +input parameter and sets the version information in the passed in structure. +It returns 0 on success or the error -EINVAL if a version mismatch is +detected. + + +AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD +------------------------------------------------------------------ + +Get the major and minor version of the autofs protocol version understood +by loaded module. This call requires an initialized struct autofs_dev_ioctl +with the ioctlfd field set to a valid autofs mount point descriptor +and sets the requested version number in version field of struct args_protover +or sub_version field of struct args_protosubver. These commands return +0 on success or one of the negative error codes if validation fails. + + +AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT +---------------------------------------------------------- + +Obtain and release a file descriptor for an autofs managed mount point +path. The open call requires an initialized struct autofs_dev_ioctl with +the path field set and the size field adjusted appropriately as well +as the devid field of struct args_openmount set to the device number of +the autofs mount. The device number can be obtained from the mount options +shown in /proc/mounts. The close call requires an initialized struct +autofs_dev_ioct with the ioctlfd field set to the descriptor obtained +from the open call. The release of the file descriptor can also be done +with close(2) so any open descriptors will also be closed at process exit. +The close call is included in the implemented operations largely for +completeness and to provide for a consistent user space implementation. + + +AUTOFS_DEV_IOCTL_READY_CMD and AUTOFS_DEV_IOCTL_FAIL_CMD +-------------------------------------------------------- + +Return mount and expire result status from user space to the kernel. +Both of these calls require an initialized struct autofs_dev_ioctl +with the ioctlfd field set to the descriptor obtained from the open +call and the token field of struct args_ready or struct args_fail set +to the wait queue token number, received by user space in the foregoing +mount or expire request. The status field of struct args_fail is set to +the errno of the operation. It is set to 0 on success. + + +AUTOFS_DEV_IOCTL_SETPIPEFD_CMD +------------------------------ + +Set the pipe file descriptor used for kernel communication to the daemon. +Normally this is set at mount time using an option but when reconnecting +to a existing mount we need to use this to tell the autofs mount about +the new kernel pipe descriptor. In order to protect mounts against +incorrectly setting the pipe descriptor we also require that the autofs +mount be catatonic (see next call). + +The call requires an initialized struct autofs_dev_ioctl with the +ioctlfd field set to the descriptor obtained from the open call and +the pipefd field of struct args_setpipefd set to descriptor of the pipe. +On success the call also sets the process group id used to identify the +controlling process (eg. the owning automount(8) daemon) to the process +group of the caller. + + +AUTOFS_DEV_IOCTL_CATATONIC_CMD +------------------------------ + +Make the autofs mount point catatonic. The autofs mount will no longer +issue mount requests, the kernel communication pipe descriptor is released +and any remaining waits in the queue released. + +The call requires an initialized struct autofs_dev_ioctl with the +ioctlfd field set to the descriptor obtained from the open call. + + +AUTOFS_DEV_IOCTL_TIMEOUT_CMD +---------------------------- + +Set the expire timeout for mounts within an autofs mount point. + +The call requires an initialized struct autofs_dev_ioctl with the +ioctlfd field set to the descriptor obtained from the open call. + + +AUTOFS_DEV_IOCTL_REQUESTER_CMD +------------------------------ + +Return the uid and gid of the last process to successfully trigger a the +mount on the given path dentry. + +The call requires an initialized struct autofs_dev_ioctl with the path +field set to the mount point in question and the size field adjusted +appropriately. Upon return the uid field of struct args_requester contains +the uid and gid field the gid. + +When reconstructing an autofs mount tree with active mounts we need to +re-connect to mounts that may have used the original process uid and +gid (or string variations of them) for mount lookups within the map entry. +This call provides the ability to obtain this uid and gid so they may be +used by user space for the mount map lookups. + + +AUTOFS_DEV_IOCTL_EXPIRE_CMD +--------------------------- + +Issue an expire request to the kernel for an autofs mount. Typically +this ioctl is called until no further expire candidates are found. + +The call requires an initialized struct autofs_dev_ioctl with the +ioctlfd field set to the descriptor obtained from the open call. In +addition an immediate expire that's independent of the mount timeout, +and a forced expire that's independent of whether the mount is busy, +can be requested by setting the how field of struct args_expire to +AUTOFS_EXP_IMMEDIATE or AUTOFS_EXP_FORCED, respectively . If no +expire candidates can be found the ioctl returns -1 with errno set to +EAGAIN. + +This call causes the kernel module to check the mount corresponding +to the given ioctlfd for mounts that can be expired, issues an expire +request back to the daemon and waits for completion. + +AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD +------------------------------ + +Checks if an autofs mount point is in use. + +The call requires an initialized struct autofs_dev_ioctl with the +ioctlfd field set to the descriptor obtained from the open call and +it returns the result in the may_umount field of struct args_askumount, +1 for busy and 0 otherwise. + + +AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD +--------------------------------- + +Check if the given path is a mountpoint. + +The call requires an initialized struct autofs_dev_ioctl. There are two +possible variations. Both use the path field set to the path of the mount +point to check and the size field adjusted appropriately. One uses the +ioctlfd field to identify a specific mount point to check while the other +variation uses the path and optionally in.type field of struct args_ismountpoint +set to an autofs mount type. The call returns 1 if this is a mount point +and sets out.devid field to the device number of the mount and out.magic +field to the relevant super block magic number (described below) or 0 if +it isn't a mountpoint. In both cases the the device number (as returned +by new_encode_dev()) is returned in out.devid field. + +If supplied with a file descriptor we're looking for a specific mount, +not necessarily at the top of the mounted stack. In this case the path +the descriptor corresponds to is considered a mountpoint if it is itself +a mountpoint or contains a mount, such as a multi-mount without a root +mount. In this case we return 1 if the descriptor corresponds to a mount +point and and also returns the super magic of the covering mount if there +is one or 0 if it isn't a mountpoint. + +If a path is supplied (and the ioctlfd field is set to -1) then the path +is looked up and is checked to see if it is the root of a mount. If a +type is also given we are looking for a particular autofs mount and if +a match isn't found a fail is returned. If the the located path is the +root of a mount 1 is returned along with the super magic of the mount +or 0 otherwise. diff --git a/Documentation/filesystems/autofs-mount-control.txt b/Documentation/filesystems/autofs-mount-control.txt deleted file mode 100644 index acc02fc57993..000000000000 --- a/Documentation/filesystems/autofs-mount-control.txt +++ /dev/null @@ -1,408 +0,0 @@ - -Miscellaneous Device control operations for the autofs kernel module -==================================================================== - -The problem -=========== - -There is a problem with active restarts in autofs (that is to say -restarting autofs when there are busy mounts). - -During normal operation autofs uses a file descriptor opened on the -directory that is being managed in order to be able to issue control -operations. Using a file descriptor gives ioctl operations access to -autofs specific information stored in the super block. The operations -are things such as setting an autofs mount catatonic, setting the -expire timeout and requesting expire checks. As is explained below, -certain types of autofs triggered mounts can end up covering an autofs -mount itself which prevents us being able to use open(2) to obtain a -file descriptor for these operations if we don't already have one open. - -Currently autofs uses "umount -l" (lazy umount) to clear active mounts -at restart. While using lazy umount works for most cases, anything that -needs to walk back up the mount tree to construct a path, such as -getcwd(2) and the proc file system /proc//cwd, no longer works -because the point from which the path is constructed has been detached -from the mount tree. - -The actual problem with autofs is that it can't reconnect to existing -mounts. Immediately one thinks of just adding the ability to remount -autofs file systems would solve it, but alas, that can't work. This is -because autofs direct mounts and the implementation of "on demand mount -and expire" of nested mount trees have the file system mounted directly -on top of the mount trigger directory dentry. - -For example, there are two types of automount maps, direct (in the kernel -module source you will see a third type called an offset, which is just -a direct mount in disguise) and indirect. - -Here is a master map with direct and indirect map entries: - -/- /etc/auto.direct -/test /etc/auto.indirect - -and the corresponding map files: - -/etc/auto.direct: - -/automount/dparse/g6 budgie:/autofs/export1 -/automount/dparse/g1 shark:/autofs/export1 -and so on. - -/etc/auto.indirect: - -g1 shark:/autofs/export1 -g6 budgie:/autofs/export1 -and so on. - -For the above indirect map an autofs file system is mounted on /test and -mounts are triggered for each sub-directory key by the inode lookup -operation. So we see a mount of shark:/autofs/export1 on /test/g1, for -example. - -The way that direct mounts are handled is by making an autofs mount on -each full path, such as /automount/dparse/g1, and using it as a mount -trigger. So when we walk on the path we mount shark:/autofs/export1 "on -top of this mount point". Since these are always directories we can -use the follow_link inode operation to trigger the mount. - -But, each entry in direct and indirect maps can have offsets (making -them multi-mount map entries). - -For example, an indirect mount map entry could also be: - -g1 \ - / shark:/autofs/export5/testing/test \ - /s1 shark:/autofs/export/testing/test/s1 \ - /s2 shark:/autofs/export5/testing/test/s2 \ - /s1/ss1 shark:/autofs/export1 \ - /s2/ss2 shark:/autofs/export2 - -and a similarly a direct mount map entry could also be: - -/automount/dparse/g1 \ - / shark:/autofs/export5/testing/test \ - /s1 shark:/autofs/export/testing/test/s1 \ - /s2 shark:/autofs/export5/testing/test/s2 \ - /s1/ss1 shark:/autofs/export2 \ - /s2/ss2 shark:/autofs/export2 - -One of the issues with version 4 of autofs was that, when mounting an -entry with a large number of offsets, possibly with nesting, we needed -to mount and umount all of the offsets as a single unit. Not really a -problem, except for people with a large number of offsets in map entries. -This mechanism is used for the well known "hosts" map and we have seen -cases (in 2.4) where the available number of mounts are exhausted or -where the number of privileged ports available is exhausted. - -In version 5 we mount only as we go down the tree of offsets and -similarly for expiring them which resolves the above problem. There is -somewhat more detail to the implementation but it isn't needed for the -sake of the problem explanation. The one important detail is that these -offsets are implemented using the same mechanism as the direct mounts -above and so the mount points can be covered by a mount. - -The current autofs implementation uses an ioctl file descriptor opened -on the mount point for control operations. The references held by the -descriptor are accounted for in checks made to determine if a mount is -in use and is also used to access autofs file system information held -in the mount super block. So the use of a file handle needs to be -retained. - - -The Solution -============ - -To be able to restart autofs leaving existing direct, indirect and -offset mounts in place we need to be able to obtain a file handle -for these potentially covered autofs mount points. Rather than just -implement an isolated operation it was decided to re-implement the -existing ioctl interface and add new operations to provide this -functionality. - -In addition, to be able to reconstruct a mount tree that has busy mounts, -the uid and gid of the last user that triggered the mount needs to be -available because these can be used as macro substitution variables in -autofs maps. They are recorded at mount request time and an operation -has been added to retrieve them. - -Since we're re-implementing the control interface, a couple of other -problems with the existing interface have been addressed. First, when -a mount or expire operation completes a status is returned to the -kernel by either a "send ready" or a "send fail" operation. The -"send fail" operation of the ioctl interface could only ever send -ENOENT so the re-implementation allows user space to send an actual -status. Another expensive operation in user space, for those using -very large maps, is discovering if a mount is present. Usually this -involves scanning /proc/mounts and since it needs to be done quite -often it can introduce significant overhead when there are many entries -in the mount table. An operation to lookup the mount status of a mount -point dentry (covered or not) has also been added. - -Current kernel development policy recommends avoiding the use of the -ioctl mechanism in favor of systems such as Netlink. An implementation -using this system was attempted to evaluate its suitability and it was -found to be inadequate, in this case. The Generic Netlink system was -used for this as raw Netlink would lead to a significant increase in -complexity. There's no question that the Generic Netlink system is an -elegant solution for common case ioctl functions but it's not a complete -replacement probably because its primary purpose in life is to be a -message bus implementation rather than specifically an ioctl replacement. -While it would be possible to work around this there is one concern -that lead to the decision to not use it. This is that the autofs -expire in the daemon has become far to complex because umount -candidates are enumerated, almost for no other reason than to "count" -the number of times to call the expire ioctl. This involves scanning -the mount table which has proved to be a big overhead for users with -large maps. The best way to improve this is try and get back to the -way the expire was done long ago. That is, when an expire request is -issued for a mount (file handle) we should continually call back to -the daemon until we can't umount any more mounts, then return the -appropriate status to the daemon. At the moment we just expire one -mount at a time. A Generic Netlink implementation would exclude this -possibility for future development due to the requirements of the -message bus architecture. - - -autofs Miscellaneous Device mount control interface -==================================================== - -The control interface is opening a device node, typically /dev/autofs. - -All the ioctls use a common structure to pass the needed parameter -information and return operation results: - -struct autofs_dev_ioctl { - __u32 ver_major; - __u32 ver_minor; - __u32 size; /* total size of data passed in - * including this struct */ - __s32 ioctlfd; /* automount command fd */ - - /* Command parameters */ - union { - struct args_protover protover; - struct args_protosubver protosubver; - struct args_openmount openmount; - struct args_ready ready; - struct args_fail fail; - struct args_setpipefd setpipefd; - struct args_timeout timeout; - struct args_requester requester; - struct args_expire expire; - struct args_askumount askumount; - struct args_ismountpoint ismountpoint; - }; - - char path[0]; -}; - -The ioctlfd field is a mount point file descriptor of an autofs mount -point. It is returned by the open call and is used by all calls except -the check for whether a given path is a mount point, where it may -optionally be used to check a specific mount corresponding to a given -mount point file descriptor, and when requesting the uid and gid of the -last successful mount on a directory within the autofs file system. - -The union is used to communicate parameters and results of calls made -as described below. - -The path field is used to pass a path where it is needed and the size field -is used account for the increased structure length when translating the -structure sent from user space. - -This structure can be initialized before setting specific fields by using -the void function call init_autofs_dev_ioctl(struct autofs_dev_ioctl *). - -All of the ioctls perform a copy of this structure from user space to -kernel space and return -EINVAL if the size parameter is smaller than -the structure size itself, -ENOMEM if the kernel memory allocation fails -or -EFAULT if the copy itself fails. Other checks include a version check -of the compiled in user space version against the module version and a -mismatch results in a -EINVAL return. If the size field is greater than -the structure size then a path is assumed to be present and is checked to -ensure it begins with a "/" and is NULL terminated, otherwise -EINVAL is -returned. Following these checks, for all ioctl commands except -AUTOFS_DEV_IOCTL_VERSION_CMD, AUTOFS_DEV_IOCTL_OPENMOUNT_CMD and -AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD the ioctlfd is validated and if it is -not a valid descriptor or doesn't correspond to an autofs mount point -an error of -EBADF, -ENOTTY or -EINVAL (not an autofs descriptor) is -returned. - - -The ioctls -========== - -An example of an implementation which uses this interface can be seen -in autofs version 5.0.4 and later in file lib/dev-ioctl-lib.c of the -distribution tar available for download from kernel.org in directory -/pub/linux/daemons/autofs/v5. - -The device node ioctl operations implemented by this interface are: - - -AUTOFS_DEV_IOCTL_VERSION ------------------------- - -Get the major and minor version of the autofs device ioctl kernel module -implementation. It requires an initialized struct autofs_dev_ioctl as an -input parameter and sets the version information in the passed in structure. -It returns 0 on success or the error -EINVAL if a version mismatch is -detected. - - -AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD ------------------------------------------------------------------- - -Get the major and minor version of the autofs protocol version understood -by loaded module. This call requires an initialized struct autofs_dev_ioctl -with the ioctlfd field set to a valid autofs mount point descriptor -and sets the requested version number in version field of struct args_protover -or sub_version field of struct args_protosubver. These commands return -0 on success or one of the negative error codes if validation fails. - - -AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT ----------------------------------------------------------- - -Obtain and release a file descriptor for an autofs managed mount point -path. The open call requires an initialized struct autofs_dev_ioctl with -the path field set and the size field adjusted appropriately as well -as the devid field of struct args_openmount set to the device number of -the autofs mount. The device number can be obtained from the mount options -shown in /proc/mounts. The close call requires an initialized struct -autofs_dev_ioct with the ioctlfd field set to the descriptor obtained -from the open call. The release of the file descriptor can also be done -with close(2) so any open descriptors will also be closed at process exit. -The close call is included in the implemented operations largely for -completeness and to provide for a consistent user space implementation. - - -AUTOFS_DEV_IOCTL_READY_CMD and AUTOFS_DEV_IOCTL_FAIL_CMD --------------------------------------------------------- - -Return mount and expire result status from user space to the kernel. -Both of these calls require an initialized struct autofs_dev_ioctl -with the ioctlfd field set to the descriptor obtained from the open -call and the token field of struct args_ready or struct args_fail set -to the wait queue token number, received by user space in the foregoing -mount or expire request. The status field of struct args_fail is set to -the errno of the operation. It is set to 0 on success. - - -AUTOFS_DEV_IOCTL_SETPIPEFD_CMD ------------------------------- - -Set the pipe file descriptor used for kernel communication to the daemon. -Normally this is set at mount time using an option but when reconnecting -to a existing mount we need to use this to tell the autofs mount about -the new kernel pipe descriptor. In order to protect mounts against -incorrectly setting the pipe descriptor we also require that the autofs -mount be catatonic (see next call). - -The call requires an initialized struct autofs_dev_ioctl with the -ioctlfd field set to the descriptor obtained from the open call and -the pipefd field of struct args_setpipefd set to descriptor of the pipe. -On success the call also sets the process group id used to identify the -controlling process (eg. the owning automount(8) daemon) to the process -group of the caller. - - -AUTOFS_DEV_IOCTL_CATATONIC_CMD ------------------------------- - -Make the autofs mount point catatonic. The autofs mount will no longer -issue mount requests, the kernel communication pipe descriptor is released -and any remaining waits in the queue released. - -The call requires an initialized struct autofs_dev_ioctl with the -ioctlfd field set to the descriptor obtained from the open call. - - -AUTOFS_DEV_IOCTL_TIMEOUT_CMD ----------------------------- - -Set the expire timeout for mounts within an autofs mount point. - -The call requires an initialized struct autofs_dev_ioctl with the -ioctlfd field set to the descriptor obtained from the open call. - - -AUTOFS_DEV_IOCTL_REQUESTER_CMD ------------------------------- - -Return the uid and gid of the last process to successfully trigger a the -mount on the given path dentry. - -The call requires an initialized struct autofs_dev_ioctl with the path -field set to the mount point in question and the size field adjusted -appropriately. Upon return the uid field of struct args_requester contains -the uid and gid field the gid. - -When reconstructing an autofs mount tree with active mounts we need to -re-connect to mounts that may have used the original process uid and -gid (or string variations of them) for mount lookups within the map entry. -This call provides the ability to obtain this uid and gid so they may be -used by user space for the mount map lookups. - - -AUTOFS_DEV_IOCTL_EXPIRE_CMD ---------------------------- - -Issue an expire request to the kernel for an autofs mount. Typically -this ioctl is called until no further expire candidates are found. - -The call requires an initialized struct autofs_dev_ioctl with the -ioctlfd field set to the descriptor obtained from the open call. In -addition an immediate expire that's independent of the mount timeout, -and a forced expire that's independent of whether the mount is busy, -can be requested by setting the how field of struct args_expire to -AUTOFS_EXP_IMMEDIATE or AUTOFS_EXP_FORCED, respectively . If no -expire candidates can be found the ioctl returns -1 with errno set to -EAGAIN. - -This call causes the kernel module to check the mount corresponding -to the given ioctlfd for mounts that can be expired, issues an expire -request back to the daemon and waits for completion. - -AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD ------------------------------- - -Checks if an autofs mount point is in use. - -The call requires an initialized struct autofs_dev_ioctl with the -ioctlfd field set to the descriptor obtained from the open call and -it returns the result in the may_umount field of struct args_askumount, -1 for busy and 0 otherwise. - - -AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD ---------------------------------- - -Check if the given path is a mountpoint. - -The call requires an initialized struct autofs_dev_ioctl. There are two -possible variations. Both use the path field set to the path of the mount -point to check and the size field adjusted appropriately. One uses the -ioctlfd field to identify a specific mount point to check while the other -variation uses the path and optionally in.type field of struct args_ismountpoint -set to an autofs mount type. The call returns 1 if this is a mount point -and sets out.devid field to the device number of the mount and out.magic -field to the relevant super block magic number (described below) or 0 if -it isn't a mountpoint. In both cases the the device number (as returned -by new_encode_dev()) is returned in out.devid field. - -If supplied with a file descriptor we're looking for a specific mount, -not necessarily at the top of the mounted stack. In this case the path -the descriptor corresponds to is considered a mountpoint if it is itself -a mountpoint or contains a mount, such as a multi-mount without a root -mount. In this case we return 1 if the descriptor corresponds to a mount -point and and also returns the super magic of the covering mount if there -is one or 0 if it isn't a mountpoint. - -If a path is supplied (and the ioctlfd field is set to -1) then the path -is looked up and is checked to see if it is the root of a mount. If a -type is also given we are looking for a particular autofs mount and if -a match isn't found a fail is returned. If the the located path is the -root of a mount 1 is returned along with the super magic of the mount -or 0 otherwise. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 0598bc52abdc..c9480138d47e 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -51,6 +51,7 @@ Documentation for filesystem implementations. affs afs autofs + autofs-mount-control fuse overlayfs virtiofs -- cgit v1.2.3 From c54ad9a4e8faf080e6b395cc4b8298dfc5170255 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:52 +0100 Subject: docs: filesystems: convert befs.txt to ReST - Add a SPDX header; - Adjust document and section titles; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/3e29ea6df6cd569021cfa953ccb8ed7dfc146f3d.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/befs.rst | 128 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/befs.txt | 117 -------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 129 insertions(+), 117 deletions(-) create mode 100644 Documentation/filesystems/befs.rst delete mode 100644 Documentation/filesystems/befs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/befs.rst b/Documentation/filesystems/befs.rst new file mode 100644 index 000000000000..79f9740d76ff --- /dev/null +++ b/Documentation/filesystems/befs.rst @@ -0,0 +1,128 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +BeOS filesystem for Linux +========================= + +Document last updated: Dec 6, 2001 + +Warning +======= +Make sure you understand that this is alpha software. This means that the +implementation is neither complete nor well-tested. + +I DISCLAIM ALL RESPONSIBILITY FOR ANY POSSIBLE BAD EFFECTS OF THIS CODE! + +License +======= +This software is covered by the GNU General Public License. +See the file COPYING for the complete text of the license. +Or the GNU website: + +Author +====== +The largest part of the code written by Will Dyson +He has been working on the code since Aug 13, 2001. See the changelog for +details. + +Original Author: Makoto Kato + +His original code can still be found at: + + +Does anyone know of a more current email address for Makoto? He doesn't +respond to the address given above... + +This filesystem doesn't have a maintainer. + +What is this Driver? +==================== +This module implements the native filesystem of BeOS http://www.beincorporated.com/ +for the linux 2.4.1 and later kernels. Currently it is a read-only +implementation. + +Which is it, BFS or BEFS? +========================= +Be, Inc said, "BeOS Filesystem is officially called BFS, not BeFS". +But Unixware Boot Filesystem is called bfs, too. And they are already in +the kernel. Because of this naming conflict, on Linux the BeOS +filesystem is called befs. + +How to Install +============== +step 1. Install the BeFS patch into the source code tree of linux. + +Apply the patchfile to your kernel source tree. +Assuming that your kernel source is in /foo/bar/linux and the patchfile +is called patch-befs-xxx, you would do the following: + + cd /foo/bar/linux + patch -p1 < /path/to/patch-befs-xxx + +if the patching step fails (i.e. there are rejected hunks), you can try to +figure it out yourself (it shouldn't be hard), or mail the maintainer +(Will Dyson ) for help. + +step 2. Configuration & make kernel + +The linux kernel has many compile-time options. Most of them are beyond the +scope of this document. I suggest the Kernel-HOWTO document as a good general +reference on this topic. http://www.linuxdocs.org/HOWTOs/Kernel-HOWTO-4.html + +However, to use the BeFS module, you must enable it at configure time:: + + cd /foo/bar/linux + make menuconfig (or xconfig) + +The BeFS module is not a standard part of the linux kernel, so you must first +enable support for experimental code under the "Code maturity level" menu. + +Then, under the "Filesystems" menu will be an option called "BeFS +filesystem (experimental)", or something like that. Enable that option +(it is fine to make it a module). + +Save your kernel configuration and then build your kernel. + +step 3. Install + +See the kernel howto for +instructions on this critical step. + +Using BFS +========= +To use the BeOS filesystem, use filesystem type 'befs'. + +ex:: + + mount -t befs /dev/fd0 /beos + +Mount Options +============= + +============= =========================================================== +uid=nnn All files in the partition will be owned by user id nnn. +gid=nnn All files in the partition will be in group nnn. +iocharset=xxx Use xxx as the name of the NLS translation table. +debug The driver will output debugging information to the syslog. +============= =========================================================== + +How to Get Lastest Version +========================== + +The latest version is currently available at: + + +Any Known Bugs? +=============== +As of Jan 20, 2002: + + None + +Special Thanks +============== +Dominic Giampalo ... Writing "Practical file system design with Be filesystem" + +Hiroyuki Yamada ... Testing LinuxPPC. + + + diff --git a/Documentation/filesystems/befs.txt b/Documentation/filesystems/befs.txt deleted file mode 100644 index da45e6c842b8..000000000000 --- a/Documentation/filesystems/befs.txt +++ /dev/null @@ -1,117 +0,0 @@ -BeOS filesystem for Linux - -Document last updated: Dec 6, 2001 - -WARNING -======= -Make sure you understand that this is alpha software. This means that the -implementation is neither complete nor well-tested. - -I DISCLAIM ALL RESPONSIBILITY FOR ANY POSSIBLE BAD EFFECTS OF THIS CODE! - -LICENSE -===== -This software is covered by the GNU General Public License. -See the file COPYING for the complete text of the license. -Or the GNU website: - -AUTHOR -===== -The largest part of the code written by Will Dyson -He has been working on the code since Aug 13, 2001. See the changelog for -details. - -Original Author: Makoto Kato -His original code can still be found at: - -Does anyone know of a more current email address for Makoto? He doesn't -respond to the address given above... - -This filesystem doesn't have a maintainer. - -WHAT IS THIS DRIVER? -================== -This module implements the native filesystem of BeOS http://www.beincorporated.com/ -for the linux 2.4.1 and later kernels. Currently it is a read-only -implementation. - -Which is it, BFS or BEFS? -================ -Be, Inc said, "BeOS Filesystem is officially called BFS, not BeFS". -But Unixware Boot Filesystem is called bfs, too. And they are already in -the kernel. Because of this naming conflict, on Linux the BeOS -filesystem is called befs. - -HOW TO INSTALL -============== -step 1. Install the BeFS patch into the source code tree of linux. - -Apply the patchfile to your kernel source tree. -Assuming that your kernel source is in /foo/bar/linux and the patchfile -is called patch-befs-xxx, you would do the following: - - cd /foo/bar/linux - patch -p1 < /path/to/patch-befs-xxx - -if the patching step fails (i.e. there are rejected hunks), you can try to -figure it out yourself (it shouldn't be hard), or mail the maintainer -(Will Dyson ) for help. - -step 2. Configuration & make kernel - -The linux kernel has many compile-time options. Most of them are beyond the -scope of this document. I suggest the Kernel-HOWTO document as a good general -reference on this topic. http://www.linuxdocs.org/HOWTOs/Kernel-HOWTO-4.html - -However, to use the BeFS module, you must enable it at configure time. - - cd /foo/bar/linux - make menuconfig (or xconfig) - -The BeFS module is not a standard part of the linux kernel, so you must first -enable support for experimental code under the "Code maturity level" menu. - -Then, under the "Filesystems" menu will be an option called "BeFS -filesystem (experimental)", or something like that. Enable that option -(it is fine to make it a module). - -Save your kernel configuration and then build your kernel. - -step 3. Install - -See the kernel howto for -instructions on this critical step. - -USING BFS -========= -To use the BeOS filesystem, use filesystem type 'befs'. - -ex) - mount -t befs /dev/fd0 /beos - -MOUNT OPTIONS -============= -uid=nnn All files in the partition will be owned by user id nnn. -gid=nnn All files in the partition will be in group nnn. -iocharset=xxx Use xxx as the name of the NLS translation table. -debug The driver will output debugging information to the syslog. - -HOW TO GET LASTEST VERSION -========================== - -The latest version is currently available at: - - -ANY KNOWN BUGS? -=========== -As of Jan 20, 2002: - - None - -SPECIAL THANKS -============== -Dominic Giampalo ... Writing "Practical file system design with Be filesystem" -Hiroyuki Yamada ... Testing LinuxPPC. - - - diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index c9480138d47e..98de437f5500 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -52,6 +52,7 @@ Documentation for filesystem implementations. afs autofs autofs-mount-control + befs fuse overlayfs virtiofs -- cgit v1.2.3 From ee68f34d7e7e553ffb74f09df0f3764fbfcf5d4b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:53 +0100 Subject: docs: filesystems: convert bfs.txt to ReST - Add a SPDX header; - Adjust document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/93991bcc05e419368ee1e585c81057fb2c7c8d2b.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/bfs.rst | 60 +++++++++++++++++++++++++++++++++++++ Documentation/filesystems/bfs.txt | 57 ----------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 61 insertions(+), 57 deletions(-) create mode 100644 Documentation/filesystems/bfs.rst delete mode 100644 Documentation/filesystems/bfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/bfs.rst b/Documentation/filesystems/bfs.rst new file mode 100644 index 000000000000..ce14b9018807 --- /dev/null +++ b/Documentation/filesystems/bfs.rst @@ -0,0 +1,60 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================== +BFS Filesystem for Linux +======================== + +The BFS filesystem is used by SCO UnixWare OS for the /stand slice, which +usually contains the kernel image and a few other files required for the +boot process. + +In order to access /stand partition under Linux you obviously need to +know the partition number and the kernel must support UnixWare disk slices +(CONFIG_UNIXWARE_DISKLABEL config option). However BFS support does not +depend on having UnixWare disklabel support because one can also mount +BFS filesystem via loopback:: + + # losetup /dev/loop0 stand.img + # mount -t bfs /dev/loop0 /mnt/stand + +where stand.img is a file containing the image of BFS filesystem. +When you have finished using it and umounted you need to also deallocate +/dev/loop0 device by:: + + # losetup -d /dev/loop0 + +You can simplify mounting by just typing:: + + # mount -t bfs -o loop stand.img /mnt/stand + +this will allocate the first available loopback device (and load loop.o +kernel module if necessary) automatically. If the loopback driver is not +loaded automatically, make sure that you have compiled the module and +that modprobe is functioning. Beware that umount will not deallocate +/dev/loopN device if /etc/mtab file on your system is a symbolic link to +/proc/mounts. You will need to do it manually using "-d" switch of +losetup(8). Read losetup(8) manpage for more info. + +To create the BFS image under UnixWare you need to find out first which +slice contains it. The command prtvtoc(1M) is your friend:: + + # prtvtoc /dev/rdsk/c0b0t0d0s0 + +(assuming your root disk is on target=0, lun=0, bus=0, controller=0). Then you +look for the slice with tag "STAND", which is usually slice 10. With this +information you can use dd(1) to create the BFS image:: + + # umount /stand + # dd if=/dev/rdsk/c0b0t0d0sa of=stand.img bs=512 + +Just in case, you can verify that you have done the right thing by checking +the magic number:: + + # od -Ad -tx4 stand.img | more + +The first 4 bytes should be 0x1badface. + +If you have any patches, questions or suggestions regarding this BFS +implementation please contact the author: + +Tigran Aivazian diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.txt deleted file mode 100644 index 843ce91a2e40..000000000000 --- a/Documentation/filesystems/bfs.txt +++ /dev/null @@ -1,57 +0,0 @@ -BFS FILESYSTEM FOR LINUX -======================== - -The BFS filesystem is used by SCO UnixWare OS for the /stand slice, which -usually contains the kernel image and a few other files required for the -boot process. - -In order to access /stand partition under Linux you obviously need to -know the partition number and the kernel must support UnixWare disk slices -(CONFIG_UNIXWARE_DISKLABEL config option). However BFS support does not -depend on having UnixWare disklabel support because one can also mount -BFS filesystem via loopback: - -# losetup /dev/loop0 stand.img -# mount -t bfs /dev/loop0 /mnt/stand - -where stand.img is a file containing the image of BFS filesystem. -When you have finished using it and umounted you need to also deallocate -/dev/loop0 device by: - -# losetup -d /dev/loop0 - -You can simplify mounting by just typing: - -# mount -t bfs -o loop stand.img /mnt/stand - -this will allocate the first available loopback device (and load loop.o -kernel module if necessary) automatically. If the loopback driver is not -loaded automatically, make sure that you have compiled the module and -that modprobe is functioning. Beware that umount will not deallocate -/dev/loopN device if /etc/mtab file on your system is a symbolic link to -/proc/mounts. You will need to do it manually using "-d" switch of -losetup(8). Read losetup(8) manpage for more info. - -To create the BFS image under UnixWare you need to find out first which -slice contains it. The command prtvtoc(1M) is your friend: - -# prtvtoc /dev/rdsk/c0b0t0d0s0 - -(assuming your root disk is on target=0, lun=0, bus=0, controller=0). Then you -look for the slice with tag "STAND", which is usually slice 10. With this -information you can use dd(1) to create the BFS image: - -# umount /stand -# dd if=/dev/rdsk/c0b0t0d0sa of=stand.img bs=512 - -Just in case, you can verify that you have done the right thing by checking -the magic number: - -# od -Ad -tx4 stand.img | more - -The first 4 bytes should be 0x1badface. - -If you have any patches, questions or suggestions regarding this BFS -implementation please contact the author: - -Tigran Aivazian diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 98de437f5500..f74e6b273d9f 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -53,6 +53,7 @@ Documentation for filesystem implementations. autofs autofs-mount-control befs + bfs fuse overlayfs virtiofs -- cgit v1.2.3 From 5d43e1bc2dfccbb07ea662fa4536544f1b6efd43 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:54 +0100 Subject: docs: filesystems: convert btrfs.txt to ReST Just trivial changes: - Add a SPDX header; - Add it to filesystems/index.rst. While here, adjust document title, just to make it use the same style of the other docs. Signed-off-by: Mauro Carvalho Chehab Acked-by: David Sterba Link: https://lore.kernel.org/r/1ef76da4ac24a9a6f6187723554733c702ea19ae.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/btrfs.rst | 34 ++++++++++++++++++++++++++++++++++ Documentation/filesystems/btrfs.txt | 31 ------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 35 insertions(+), 31 deletions(-) create mode 100644 Documentation/filesystems/btrfs.rst delete mode 100644 Documentation/filesystems/btrfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/btrfs.rst b/Documentation/filesystems/btrfs.rst new file mode 100644 index 000000000000..d0904f602819 --- /dev/null +++ b/Documentation/filesystems/btrfs.rst @@ -0,0 +1,34 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===== +BTRFS +===== + +Btrfs is a copy on write filesystem for Linux aimed at implementing advanced +features while focusing on fault tolerance, repair and easy administration. +Jointly developed by several companies, licensed under the GPL and open for +contribution from anyone. + +The main Btrfs features include: + + * Extent based file storage (2^64 max file size) + * Space efficient packing of small files + * Space efficient indexed directories + * Dynamic inode allocation + * Writable snapshots + * Subvolumes (separate internal filesystem roots) + * Object level mirroring and striping + * Checksums on data and metadata (multiple algorithms available) + * Compression + * Integrated multiple device support, with several raid algorithms + * Offline filesystem check + * Efficient incremental backup and FS mirroring + * Online filesystem defragmentation + +For more information please refer to the wiki + + https://btrfs.wiki.kernel.org + +that maintains information about administration tasks, frequently asked +questions, use cases, mount options, comprehensible changelogs, features, +manual pages, source code repositories, contacts etc. diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt deleted file mode 100644 index f9dad22d95ce..000000000000 --- a/Documentation/filesystems/btrfs.txt +++ /dev/null @@ -1,31 +0,0 @@ -BTRFS -===== - -Btrfs is a copy on write filesystem for Linux aimed at implementing advanced -features while focusing on fault tolerance, repair and easy administration. -Jointly developed by several companies, licensed under the GPL and open for -contribution from anyone. - -The main Btrfs features include: - - * Extent based file storage (2^64 max file size) - * Space efficient packing of small files - * Space efficient indexed directories - * Dynamic inode allocation - * Writable snapshots - * Subvolumes (separate internal filesystem roots) - * Object level mirroring and striping - * Checksums on data and metadata (multiple algorithms available) - * Compression - * Integrated multiple device support, with several raid algorithms - * Offline filesystem check - * Efficient incremental backup and FS mirroring - * Online filesystem defragmentation - -For more information please refer to the wiki - - https://btrfs.wiki.kernel.org - -that maintains information about administration tasks, frequently asked -questions, use cases, mount options, comprehensible changelogs, features, -manual pages, source code repositories, contacts etc. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index f74e6b273d9f..dae862cf167e 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -54,6 +54,7 @@ Documentation for filesystem implementations. autofs-mount-control befs bfs + btrfs fuse overlayfs virtiofs -- cgit v1.2.3 From 471379a174aa444b326d1b74e9f96a8b4b766b79 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:55 +0100 Subject: docs: filesystems: convert ceph.txt to ReST - Add a SPDX header; - Adjust document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Jeff Layton Link: https://lore.kernel.org/r/df2f142b5ca5842e030d8209482dfd62dcbe020f.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/ceph.rst | 190 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/ceph.txt | 186 ----------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 191 insertions(+), 186 deletions(-) create mode 100644 Documentation/filesystems/ceph.rst delete mode 100644 Documentation/filesystems/ceph.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/ceph.rst b/Documentation/filesystems/ceph.rst new file mode 100644 index 000000000000..b46a7218248f --- /dev/null +++ b/Documentation/filesystems/ceph.rst @@ -0,0 +1,190 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================ +Ceph Distributed File System +============================ + +Ceph is a distributed network file system designed to provide good +performance, reliability, and scalability. + +Basic features include: + + * POSIX semantics + * Seamless scaling from 1 to many thousands of nodes + * High availability and reliability. No single point of failure. + * N-way replication of data across storage nodes + * Fast recovery from node failures + * Automatic rebalancing of data on node addition/removal + * Easy deployment: most FS components are userspace daemons + +Also, + + * Flexible snapshots (on any directory) + * Recursive accounting (nested files, directories, bytes) + +In contrast to cluster filesystems like GFS, OCFS2, and GPFS that rely +on symmetric access by all clients to shared block devices, Ceph +separates data and metadata management into independent server +clusters, similar to Lustre. Unlike Lustre, however, metadata and +storage nodes run entirely as user space daemons. File data is striped +across storage nodes in large chunks to distribute workload and +facilitate high throughputs. When storage nodes fail, data is +re-replicated in a distributed fashion by the storage nodes themselves +(with some minimal coordination from a cluster monitor), making the +system extremely efficient and scalable. + +Metadata servers effectively form a large, consistent, distributed +in-memory cache above the file namespace that is extremely scalable, +dynamically redistributes metadata in response to workload changes, +and can tolerate arbitrary (well, non-Byzantine) node failures. The +metadata server takes a somewhat unconventional approach to metadata +storage to significantly improve performance for common workloads. In +particular, inodes with only a single link are embedded in +directories, allowing entire directories of dentries and inodes to be +loaded into its cache with a single I/O operation. The contents of +extremely large directories can be fragmented and managed by +independent metadata servers, allowing scalable concurrent access. + +The system offers automatic data rebalancing/migration when scaling +from a small cluster of just a few nodes to many hundreds, without +requiring an administrator carve the data set into static volumes or +go through the tedious process of migrating data between servers. +When the file system approaches full, new nodes can be easily added +and things will "just work." + +Ceph includes flexible snapshot mechanism that allows a user to create +a snapshot on any subdirectory (and its nested contents) in the +system. Snapshot creation and deletion are as simple as 'mkdir +.snap/foo' and 'rmdir .snap/foo'. + +Ceph also provides some recursive accounting on directories for nested +files and bytes. That is, a 'getfattr -d foo' on any directory in the +system will reveal the total number of nested regular files and +subdirectories, and a summation of all nested file sizes. This makes +the identification of large disk space consumers relatively quick, as +no 'du' or similar recursive scan of the file system is required. + +Finally, Ceph also allows quotas to be set on any directory in the system. +The quota can restrict the number of bytes or the number of files stored +beneath that point in the directory hierarchy. Quotas can be set using +extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg:: + + setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir + getfattr -n ceph.quota.max_bytes /some/dir + +A limitation of the current quotas implementation is that it relies on the +cooperation of the client mounting the file system to stop writers when a +limit is reached. A modified or adversarial client cannot be prevented +from writing as much data as it needs. + +Mount Syntax +============ + +The basic mount syntax is:: + + # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt + +You only need to specify a single monitor, as the client will get the +full list when it connects. (However, if the monitor you specify +happens to be down, the mount won't succeed.) The port can be left +off if the monitor is using the default. So if the monitor is at +1.2.3.4:: + + # mount -t ceph 1.2.3.4:/ /mnt/ceph + +is sufficient. If /sbin/mount.ceph is installed, a hostname can be +used instead of an IP address. + + + +Mount Options +============= + + ip=A.B.C.D[:N] + Specify the IP and/or port the client should bind to locally. + There is normally not much reason to do this. If the IP is not + specified, the client's IP address is determined by looking at the + address its connection to the monitor originates from. + + wsize=X + Specify the maximum write size in bytes. Default: 16 MB. + + rsize=X + Specify the maximum read size in bytes. Default: 16 MB. + + rasize=X + Specify the maximum readahead size in bytes. Default: 8 MB. + + mount_timeout=X + Specify the timeout value for mount (in seconds), in the case + of a non-responsive Ceph file system. The default is 30 + seconds. + + caps_max=X + Specify the maximum number of caps to hold. Unused caps are released + when number of caps exceeds the limit. The default is 0 (no limit) + + rbytes + When stat() is called on a directory, set st_size to 'rbytes', + the summation of file sizes over all files nested beneath that + directory. This is the default. + + norbytes + When stat() is called on a directory, set st_size to the + number of entries in that directory. + + nocrc + Disable CRC32C calculation for data writes. If set, the storage node + must rely on TCP's error correction to detect data corruption + in the data payload. + + dcache + Use the dcache contents to perform negative lookups and + readdir when the client has the entire directory contents in + its cache. (This does not change correctness; the client uses + cached metadata only when a lease or capability ensures it is + valid.) + + nodcache + Do not use the dcache as above. This avoids a significant amount of + complex code, sacrificing performance without affecting correctness, + and is useful for tracking down bugs. + + noasyncreaddir + Do not use the dcache as above for readdir. + + noquotadf + Report overall filesystem usage in statfs instead of using the root + directory quota. + + nocopyfrom + Don't use the RADOS 'copy-from' operation to perform remote object + copies. Currently, it's only used in copy_file_range, which will revert + to the default VFS implementation if this option is used. + + recover_session= + Set auto reconnect mode in the case where the client is blacklisted. The + available modes are "no" and "clean". The default is "no". + + * no: never attempt to reconnect when client detects that it has been + blacklisted. Operations will generally fail after being blacklisted. + + * clean: client reconnects to the ceph cluster automatically when it + detects that it has been blacklisted. During reconnect, client drops + dirty data/metadata, invalidates page caches and writable file handles. + After reconnect, file locks become stale because the MDS loses track + of them. If an inode contains any stale file locks, read/write on the + inode is not allowed until applications release all stale file locks. + +More Information +================ + +For more information on Ceph, see the home page at + https://ceph.com/ + +The Linux kernel client source tree is available at + - https://github.com/ceph/ceph-client.git + - git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git + +and the source for the full system is at + https://github.com/ceph/ceph.git diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt deleted file mode 100644 index b19b6a03f91c..000000000000 --- a/Documentation/filesystems/ceph.txt +++ /dev/null @@ -1,186 +0,0 @@ -Ceph Distributed File System -============================ - -Ceph is a distributed network file system designed to provide good -performance, reliability, and scalability. - -Basic features include: - - * POSIX semantics - * Seamless scaling from 1 to many thousands of nodes - * High availability and reliability. No single point of failure. - * N-way replication of data across storage nodes - * Fast recovery from node failures - * Automatic rebalancing of data on node addition/removal - * Easy deployment: most FS components are userspace daemons - -Also, - * Flexible snapshots (on any directory) - * Recursive accounting (nested files, directories, bytes) - -In contrast to cluster filesystems like GFS, OCFS2, and GPFS that rely -on symmetric access by all clients to shared block devices, Ceph -separates data and metadata management into independent server -clusters, similar to Lustre. Unlike Lustre, however, metadata and -storage nodes run entirely as user space daemons. File data is striped -across storage nodes in large chunks to distribute workload and -facilitate high throughputs. When storage nodes fail, data is -re-replicated in a distributed fashion by the storage nodes themselves -(with some minimal coordination from a cluster monitor), making the -system extremely efficient and scalable. - -Metadata servers effectively form a large, consistent, distributed -in-memory cache above the file namespace that is extremely scalable, -dynamically redistributes metadata in response to workload changes, -and can tolerate arbitrary (well, non-Byzantine) node failures. The -metadata server takes a somewhat unconventional approach to metadata -storage to significantly improve performance for common workloads. In -particular, inodes with only a single link are embedded in -directories, allowing entire directories of dentries and inodes to be -loaded into its cache with a single I/O operation. The contents of -extremely large directories can be fragmented and managed by -independent metadata servers, allowing scalable concurrent access. - -The system offers automatic data rebalancing/migration when scaling -from a small cluster of just a few nodes to many hundreds, without -requiring an administrator carve the data set into static volumes or -go through the tedious process of migrating data between servers. -When the file system approaches full, new nodes can be easily added -and things will "just work." - -Ceph includes flexible snapshot mechanism that allows a user to create -a snapshot on any subdirectory (and its nested contents) in the -system. Snapshot creation and deletion are as simple as 'mkdir -.snap/foo' and 'rmdir .snap/foo'. - -Ceph also provides some recursive accounting on directories for nested -files and bytes. That is, a 'getfattr -d foo' on any directory in the -system will reveal the total number of nested regular files and -subdirectories, and a summation of all nested file sizes. This makes -the identification of large disk space consumers relatively quick, as -no 'du' or similar recursive scan of the file system is required. - -Finally, Ceph also allows quotas to be set on any directory in the system. -The quota can restrict the number of bytes or the number of files stored -beneath that point in the directory hierarchy. Quotas can be set using -extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg: - - setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir - getfattr -n ceph.quota.max_bytes /some/dir - -A limitation of the current quotas implementation is that it relies on the -cooperation of the client mounting the file system to stop writers when a -limit is reached. A modified or adversarial client cannot be prevented -from writing as much data as it needs. - -Mount Syntax -============ - -The basic mount syntax is: - - # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt - -You only need to specify a single monitor, as the client will get the -full list when it connects. (However, if the monitor you specify -happens to be down, the mount won't succeed.) The port can be left -off if the monitor is using the default. So if the monitor is at -1.2.3.4, - - # mount -t ceph 1.2.3.4:/ /mnt/ceph - -is sufficient. If /sbin/mount.ceph is installed, a hostname can be -used instead of an IP address. - - - -Mount Options -============= - - ip=A.B.C.D[:N] - Specify the IP and/or port the client should bind to locally. - There is normally not much reason to do this. If the IP is not - specified, the client's IP address is determined by looking at the - address its connection to the monitor originates from. - - wsize=X - Specify the maximum write size in bytes. Default: 16 MB. - - rsize=X - Specify the maximum read size in bytes. Default: 16 MB. - - rasize=X - Specify the maximum readahead size in bytes. Default: 8 MB. - - mount_timeout=X - Specify the timeout value for mount (in seconds), in the case - of a non-responsive Ceph file system. The default is 30 - seconds. - - caps_max=X - Specify the maximum number of caps to hold. Unused caps are released - when number of caps exceeds the limit. The default is 0 (no limit) - - rbytes - When stat() is called on a directory, set st_size to 'rbytes', - the summation of file sizes over all files nested beneath that - directory. This is the default. - - norbytes - When stat() is called on a directory, set st_size to the - number of entries in that directory. - - nocrc - Disable CRC32C calculation for data writes. If set, the storage node - must rely on TCP's error correction to detect data corruption - in the data payload. - - dcache - Use the dcache contents to perform negative lookups and - readdir when the client has the entire directory contents in - its cache. (This does not change correctness; the client uses - cached metadata only when a lease or capability ensures it is - valid.) - - nodcache - Do not use the dcache as above. This avoids a significant amount of - complex code, sacrificing performance without affecting correctness, - and is useful for tracking down bugs. - - noasyncreaddir - Do not use the dcache as above for readdir. - - noquotadf - Report overall filesystem usage in statfs instead of using the root - directory quota. - - nocopyfrom - Don't use the RADOS 'copy-from' operation to perform remote object - copies. Currently, it's only used in copy_file_range, which will revert - to the default VFS implementation if this option is used. - - recover_session= - Set auto reconnect mode in the case where the client is blacklisted. The - available modes are "no" and "clean". The default is "no". - - * no: never attempt to reconnect when client detects that it has been - blacklisted. Operations will generally fail after being blacklisted. - - * clean: client reconnects to the ceph cluster automatically when it - detects that it has been blacklisted. During reconnect, client drops - dirty data/metadata, invalidates page caches and writable file handles. - After reconnect, file locks become stale because the MDS loses track - of them. If an inode contains any stale file locks, read/write on the - inode is not allowed until applications release all stale file locks. - -More Information -================ - -For more information on Ceph, see the home page at - https://ceph.com/ - -The Linux kernel client source tree is available at - https://github.com/ceph/ceph-client.git - git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git - -and the source for the full system is at - https://github.com/ceph/ceph.git diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index dae862cf167e..ddd8f7b2bb25 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -55,6 +55,7 @@ Documentation for filesystem implementations. befs bfs btrfs + ceph fuse overlayfs virtiofs -- cgit v1.2.3 From f1fa0e6028d395c5f0d1a0929a795b8dc0d43295 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:56 +0100 Subject: docs: filesystems: convert cramfs.txt to ReST - Add a SPDX header; - Adjust document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Nicolas Pitre Link: https://lore.kernel.org/r/e87b267e71f99974b7bb3fc0a4a08454ff58165e.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/cramfs.rst | 123 +++++++++++++++++++++++++++++++++++ Documentation/filesystems/cramfs.txt | 118 --------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 124 insertions(+), 118 deletions(-) create mode 100644 Documentation/filesystems/cramfs.rst delete mode 100644 Documentation/filesystems/cramfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/cramfs.rst b/Documentation/filesystems/cramfs.rst new file mode 100644 index 000000000000..afbdbde98bd2 --- /dev/null +++ b/Documentation/filesystems/cramfs.rst @@ -0,0 +1,123 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================================== +Cramfs - cram a filesystem onto a small ROM +=========================================== + +cramfs is designed to be simple and small, and to compress things well. + +It uses the zlib routines to compress a file one page at a time, and +allows random page access. The meta-data is not compressed, but is +expressed in a very terse representation to make it use much less +diskspace than traditional filesystems. + +You can't write to a cramfs filesystem (making it compressible and +compact also makes it _very_ hard to update on-the-fly), so you have to +create the disk image with the "mkcramfs" utility. + + +Usage Notes +----------- + +File sizes are limited to less than 16MB. + +Maximum filesystem size is a little over 256MB. (The last file on the +filesystem is allowed to extend past 256MB.) + +Only the low 8 bits of gid are stored. The current version of +mkcramfs simply truncates to 8 bits, which is a potential security +issue. + +Hard links are supported, but hard linked files +will still have a link count of 1 in the cramfs image. + +Cramfs directories have no ``.`` or ``..`` entries. Directories (like +every other file on cramfs) always have a link count of 1. (There's +no need to use -noleaf in ``find``, btw.) + +No timestamps are stored in a cramfs, so these default to the epoch +(1970 GMT). Recently-accessed files may have updated timestamps, but +the update lasts only as long as the inode is cached in memory, after +which the timestamp reverts to 1970, i.e. moves backwards in time. + +Currently, cramfs must be written and read with architectures of the +same endianness, and can be read only by kernels with PAGE_SIZE +== 4096. At least the latter of these is a bug, but it hasn't been +decided what the best fix is. For the moment if you have larger pages +you can just change the #define in mkcramfs.c, so long as you don't +mind the filesystem becoming unreadable to future kernels. + + +Memory Mapped cramfs image +-------------------------- + +The CRAMFS_MTD Kconfig option adds support for loading data directly from +a physical linear memory range (usually non volatile memory like Flash) +instead of going through the block device layer. This saves some memory +since no intermediate buffering is necessary to hold the data before +decompressing. + +And when data blocks are kept uncompressed and properly aligned, they will +automatically be mapped directly into user space whenever possible providing +eXecute-In-Place (XIP) from ROM of read-only segments. Data segments mapped +read-write (hence they have to be copied to RAM) may still be compressed in +the cramfs image in the same file along with non compressed read-only +segments. Both MMU and no-MMU systems are supported. This is particularly +handy for tiny embedded systems with very tight memory constraints. + +The location of the cramfs image in memory is system dependent. You must +know the proper physical address where the cramfs image is located and +configure an MTD device for it. Also, that MTD device must be supported +by a map driver that implements the "point" method. Examples of such +MTD drivers are cfi_cmdset_0001 (Intel/Sharp CFI flash) or physmap +(Flash device in physical memory map). MTD partitions based on such devices +are fine too. Then that device should be specified with the "mtd:" prefix +as the mount device argument. For example, to mount the MTD device named +"fs_partition" on the /mnt directory:: + + $ mount -t cramfs mtd:fs_partition /mnt + +To boot a kernel with this as root filesystem, suffice to specify +something like "root=mtd:fs_partition" on the kernel command line. + + +Tools +----- + +A version of mkcramfs that can take advantage of the latest capabilities +described above can be found here: + +https://github.com/npitre/cramfs-tools + + +For /usr/share/magic +-------------------- + +===== ======================= ======================= +0 ulelong 0x28cd3d45 Linux cramfs offset 0 +>4 ulelong x size %d +>8 ulelong x flags 0x%x +>12 ulelong x future 0x%x +>16 string >\0 signature "%.16s" +>32 ulelong x fsid.crc 0x%x +>36 ulelong x fsid.edition %d +>40 ulelong x fsid.blocks %d +>44 ulelong x fsid.files %d +>48 string >\0 name "%.16s" +512 ulelong 0x28cd3d45 Linux cramfs offset 512 +>516 ulelong x size %d +>520 ulelong x flags 0x%x +>524 ulelong x future 0x%x +>528 string >\0 signature "%.16s" +>544 ulelong x fsid.crc 0x%x +>548 ulelong x fsid.edition %d +>552 ulelong x fsid.blocks %d +>556 ulelong x fsid.files %d +>560 string >\0 name "%.16s" +===== ======================= ======================= + + +Hacker Notes +------------ + +See fs/cramfs/README for filesystem layout and implementation notes. diff --git a/Documentation/filesystems/cramfs.txt b/Documentation/filesystems/cramfs.txt deleted file mode 100644 index 8e19a53d648b..000000000000 --- a/Documentation/filesystems/cramfs.txt +++ /dev/null @@ -1,118 +0,0 @@ - - Cramfs - cram a filesystem onto a small ROM - -cramfs is designed to be simple and small, and to compress things well. - -It uses the zlib routines to compress a file one page at a time, and -allows random page access. The meta-data is not compressed, but is -expressed in a very terse representation to make it use much less -diskspace than traditional filesystems. - -You can't write to a cramfs filesystem (making it compressible and -compact also makes it _very_ hard to update on-the-fly), so you have to -create the disk image with the "mkcramfs" utility. - - -Usage Notes ------------ - -File sizes are limited to less than 16MB. - -Maximum filesystem size is a little over 256MB. (The last file on the -filesystem is allowed to extend past 256MB.) - -Only the low 8 bits of gid are stored. The current version of -mkcramfs simply truncates to 8 bits, which is a potential security -issue. - -Hard links are supported, but hard linked files -will still have a link count of 1 in the cramfs image. - -Cramfs directories have no `.' or `..' entries. Directories (like -every other file on cramfs) always have a link count of 1. (There's -no need to use -noleaf in `find', btw.) - -No timestamps are stored in a cramfs, so these default to the epoch -(1970 GMT). Recently-accessed files may have updated timestamps, but -the update lasts only as long as the inode is cached in memory, after -which the timestamp reverts to 1970, i.e. moves backwards in time. - -Currently, cramfs must be written and read with architectures of the -same endianness, and can be read only by kernels with PAGE_SIZE -== 4096. At least the latter of these is a bug, but it hasn't been -decided what the best fix is. For the moment if you have larger pages -you can just change the #define in mkcramfs.c, so long as you don't -mind the filesystem becoming unreadable to future kernels. - - -Memory Mapped cramfs image --------------------------- - -The CRAMFS_MTD Kconfig option adds support for loading data directly from -a physical linear memory range (usually non volatile memory like Flash) -instead of going through the block device layer. This saves some memory -since no intermediate buffering is necessary to hold the data before -decompressing. - -And when data blocks are kept uncompressed and properly aligned, they will -automatically be mapped directly into user space whenever possible providing -eXecute-In-Place (XIP) from ROM of read-only segments. Data segments mapped -read-write (hence they have to be copied to RAM) may still be compressed in -the cramfs image in the same file along with non compressed read-only -segments. Both MMU and no-MMU systems are supported. This is particularly -handy for tiny embedded systems with very tight memory constraints. - -The location of the cramfs image in memory is system dependent. You must -know the proper physical address where the cramfs image is located and -configure an MTD device for it. Also, that MTD device must be supported -by a map driver that implements the "point" method. Examples of such -MTD drivers are cfi_cmdset_0001 (Intel/Sharp CFI flash) or physmap -(Flash device in physical memory map). MTD partitions based on such devices -are fine too. Then that device should be specified with the "mtd:" prefix -as the mount device argument. For example, to mount the MTD device named -"fs_partition" on the /mnt directory: - -$ mount -t cramfs mtd:fs_partition /mnt - -To boot a kernel with this as root filesystem, suffice to specify -something like "root=mtd:fs_partition" on the kernel command line. - - -Tools ------ - -A version of mkcramfs that can take advantage of the latest capabilities -described above can be found here: - -https://github.com/npitre/cramfs-tools - - -For /usr/share/magic --------------------- - -0 ulelong 0x28cd3d45 Linux cramfs offset 0 ->4 ulelong x size %d ->8 ulelong x flags 0x%x ->12 ulelong x future 0x%x ->16 string >\0 signature "%.16s" ->32 ulelong x fsid.crc 0x%x ->36 ulelong x fsid.edition %d ->40 ulelong x fsid.blocks %d ->44 ulelong x fsid.files %d ->48 string >\0 name "%.16s" -512 ulelong 0x28cd3d45 Linux cramfs offset 512 ->516 ulelong x size %d ->520 ulelong x flags 0x%x ->524 ulelong x future 0x%x ->528 string >\0 signature "%.16s" ->544 ulelong x fsid.crc 0x%x ->548 ulelong x fsid.edition %d ->552 ulelong x fsid.blocks %d ->556 ulelong x fsid.files %d ->560 string >\0 name "%.16s" - - -Hacker Notes ------------- - -See fs/cramfs/README for filesystem layout and implementation notes. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index ddd8f7b2bb25..8fe848ea04af 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -56,6 +56,7 @@ Documentation for filesystem implementations. bfs btrfs ceph + cramfs fuse overlayfs virtiofs -- cgit v1.2.3 From 57443789849cd79e66488301a01f01c6340942ce Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:57 +0100 Subject: docs: filesystems: convert debugfs.txt to ReST - Add a SPDX header; - Use copyright symbol; - Add a document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Use footnoote markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/42db8f9db17a5d8b619130815ae63d1615951d50.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/debugfs.rst | 247 ++++++++++++++++++++++++++++++++++ Documentation/filesystems/debugfs.txt | 241 --------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 248 insertions(+), 241 deletions(-) create mode 100644 Documentation/filesystems/debugfs.rst delete mode 100644 Documentation/filesystems/debugfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/debugfs.rst b/Documentation/filesystems/debugfs.rst new file mode 100644 index 000000000000..c89d2d335dfb --- /dev/null +++ b/Documentation/filesystems/debugfs.rst @@ -0,0 +1,247 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +======= +DebugFS +======= + +Copyright |copy| 2009 Jonathan Corbet + +Debugfs exists as a simple way for kernel developers to make information +available to user space. Unlike /proc, which is only meant for information +about a process, or sysfs, which has strict one-value-per-file rules, +debugfs has no rules at all. Developers can put any information they want +there. The debugfs filesystem is also intended to not serve as a stable +ABI to user space; in theory, there are no stability constraints placed on +files exported there. The real world is not always so simple, though [1]_; +even debugfs interfaces are best designed with the idea that they will need +to be maintained forever. + +Debugfs is typically mounted with a command like:: + + mount -t debugfs none /sys/kernel/debug + +(Or an equivalent /etc/fstab line). +The debugfs root directory is accessible only to the root user by +default. To change access to the tree the "uid", "gid" and "mode" mount +options can be used. + +Note that the debugfs API is exported GPL-only to modules. + +Code using debugfs should include . Then, the first order +of business will be to create at least one directory to hold a set of +debugfs files:: + + struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); + +This call, if successful, will make a directory called name underneath the +indicated parent directory. If parent is NULL, the directory will be +created in the debugfs root. On success, the return value is a struct +dentry pointer which can be used to create files in the directory (and to +clean it up at the end). An ERR_PTR(-ERROR) return value indicates that +something went wrong. If ERR_PTR(-ENODEV) is returned, that is an +indication that the kernel has been built without debugfs support and none +of the functions described below will work. + +The most general way to create a file within a debugfs directory is with:: + + struct dentry *debugfs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops); + +Here, name is the name of the file to create, mode describes the access +permissions the file should have, parent indicates the directory which +should hold the file, data will be stored in the i_private field of the +resulting inode structure, and fops is a set of file operations which +implement the file's behavior. At a minimum, the read() and/or write() +operations should be provided; others can be included as needed. Again, +the return value will be a dentry pointer to the created file, +ERR_PTR(-ERROR) on error, or ERR_PTR(-ENODEV) if debugfs support is +missing. + +Create a file with an initial size, the following function can be used +instead:: + + struct dentry *debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size); + +file_size is the initial file size. The other parameters are the same +as the function debugfs_create_file. + +In a number of cases, the creation of a set of file operations is not +actually necessary; the debugfs code provides a number of helper functions +for simple situations. Files containing a single integer value can be +created with any of:: + + void debugfs_create_u8(const char *name, umode_t mode, + struct dentry *parent, u8 *value); + void debugfs_create_u16(const char *name, umode_t mode, + struct dentry *parent, u16 *value); + struct dentry *debugfs_create_u32(const char *name, umode_t mode, + struct dentry *parent, u32 *value); + void debugfs_create_u64(const char *name, umode_t mode, + struct dentry *parent, u64 *value); + +These files support both reading and writing the given value; if a specific +file should not be written to, simply set the mode bits accordingly. The +values in these files are in decimal; if hexadecimal is more appropriate, +the following functions can be used instead:: + + void debugfs_create_x8(const char *name, umode_t mode, + struct dentry *parent, u8 *value); + void debugfs_create_x16(const char *name, umode_t mode, + struct dentry *parent, u16 *value); + void debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, u32 *value); + void debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value); + +These functions are useful as long as the developer knows the size of the +value to be exported. Some types can have different widths on different +architectures, though, complicating the situation somewhat. There are +functions meant to help out in such special cases:: + + void debugfs_create_size_t(const char *name, umode_t mode, + struct dentry *parent, size_t *value); + +As might be expected, this function will create a debugfs file to represent +a variable of type size_t. + +Similarly, there are helpers for variables of type unsigned long, in decimal +and hexadecimal:: + + struct dentry *debugfs_create_ulong(const char *name, umode_t mode, + struct dentry *parent, + unsigned long *value); + void debugfs_create_xul(const char *name, umode_t mode, + struct dentry *parent, unsigned long *value); + +Boolean values can be placed in debugfs with:: + + struct dentry *debugfs_create_bool(const char *name, umode_t mode, + struct dentry *parent, bool *value); + +A read on the resulting file will yield either Y (for non-zero values) or +N, followed by a newline. If written to, it will accept either upper- or +lower-case values, or 1 or 0. Any other input will be silently ignored. + +Also, atomic_t values can be placed in debugfs with:: + + void debugfs_create_atomic_t(const char *name, umode_t mode, + struct dentry *parent, atomic_t *value) + +A read of this file will get atomic_t values, and a write of this file +will set atomic_t values. + +Another option is exporting a block of arbitrary binary data, with +this structure and function:: + + struct debugfs_blob_wrapper { + void *data; + unsigned long size; + }; + + struct dentry *debugfs_create_blob(const char *name, umode_t mode, + struct dentry *parent, + struct debugfs_blob_wrapper *blob); + +A read of this file will return the data pointed to by the +debugfs_blob_wrapper structure. Some drivers use "blobs" as a simple way +to return several lines of (static) formatted text output. This function +can be used to export binary information, but there does not appear to be +any code which does so in the mainline. Note that all files created with +debugfs_create_blob() are read-only. + +If you want to dump a block of registers (something that happens quite +often during development, even if little such code reaches mainline. +Debugfs offers two functions: one to make a registers-only file, and +another to insert a register block in the middle of another sequential +file:: + + struct debugfs_reg32 { + char *name; + unsigned long offset; + }; + + struct debugfs_regset32 { + struct debugfs_reg32 *regs; + int nregs; + void __iomem *base; + }; + + struct dentry *debugfs_create_regset32(const char *name, umode_t mode, + struct dentry *parent, + struct debugfs_regset32 *regset); + + void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs, + int nregs, void __iomem *base, char *prefix); + +The "base" argument may be 0, but you may want to build the reg32 array +using __stringify, and a number of register names (macros) are actually +byte offsets over a base for the register block. + +If you want to dump an u32 array in debugfs, you can create file with:: + + void debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, + u32 *array, u32 elements); + +The "array" argument provides data, and the "elements" argument is +the number of elements in the array. Note: Once array is created its +size can not be changed. + +There is a helper function to create device related seq_file:: + + struct dentry *debugfs_create_devm_seqfile(struct device *dev, + const char *name, + struct dentry *parent, + int (*read_fn)(struct seq_file *s, + void *data)); + +The "dev" argument is the device related to this debugfs file, and +the "read_fn" is a function pointer which to be called to print the +seq_file content. + +There are a couple of other directory-oriented helper functions:: + + struct dentry *debugfs_rename(struct dentry *old_dir, + struct dentry *old_dentry, + struct dentry *new_dir, + const char *new_name); + + struct dentry *debugfs_create_symlink(const char *name, + struct dentry *parent, + const char *target); + +A call to debugfs_rename() will give a new name to an existing debugfs +file, possibly in a different directory. The new_name must not exist prior +to the call; the return value is old_dentry with updated information. +Symbolic links can be created with debugfs_create_symlink(). + +There is one important thing that all debugfs users must take into account: +there is no automatic cleanup of any directories created in debugfs. If a +module is unloaded without explicitly removing debugfs entries, the result +will be a lot of stale pointers and no end of highly antisocial behavior. +So all debugfs users - at least those which can be built as modules - must +be prepared to remove all files and directories they create there. A file +can be removed with:: + + void debugfs_remove(struct dentry *dentry); + +The dentry value can be NULL or an error value, in which case nothing will +be removed. + +Once upon a time, debugfs users were required to remember the dentry +pointer for every debugfs file they created so that all files could be +cleaned up. We live in more civilized times now, though, and debugfs users +can call:: + + void debugfs_remove_recursive(struct dentry *dentry); + +If this function is passed a pointer for the dentry corresponding to the +top-level directory, the entire hierarchy below that directory will be +removed. + +.. [1] http://lwn.net/Articles/309298/ diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt deleted file mode 100644 index dc497b96fa4f..000000000000 --- a/Documentation/filesystems/debugfs.txt +++ /dev/null @@ -1,241 +0,0 @@ -Copyright 2009 Jonathan Corbet - -Debugfs exists as a simple way for kernel developers to make information -available to user space. Unlike /proc, which is only meant for information -about a process, or sysfs, which has strict one-value-per-file rules, -debugfs has no rules at all. Developers can put any information they want -there. The debugfs filesystem is also intended to not serve as a stable -ABI to user space; in theory, there are no stability constraints placed on -files exported there. The real world is not always so simple, though [1]; -even debugfs interfaces are best designed with the idea that they will need -to be maintained forever. - -Debugfs is typically mounted with a command like: - - mount -t debugfs none /sys/kernel/debug - -(Or an equivalent /etc/fstab line). -The debugfs root directory is accessible only to the root user by -default. To change access to the tree the "uid", "gid" and "mode" mount -options can be used. - -Note that the debugfs API is exported GPL-only to modules. - -Code using debugfs should include . Then, the first order -of business will be to create at least one directory to hold a set of -debugfs files: - - struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); - -This call, if successful, will make a directory called name underneath the -indicated parent directory. If parent is NULL, the directory will be -created in the debugfs root. On success, the return value is a struct -dentry pointer which can be used to create files in the directory (and to -clean it up at the end). An ERR_PTR(-ERROR) return value indicates that -something went wrong. If ERR_PTR(-ENODEV) is returned, that is an -indication that the kernel has been built without debugfs support and none -of the functions described below will work. - -The most general way to create a file within a debugfs directory is with: - - struct dentry *debugfs_create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops); - -Here, name is the name of the file to create, mode describes the access -permissions the file should have, parent indicates the directory which -should hold the file, data will be stored in the i_private field of the -resulting inode structure, and fops is a set of file operations which -implement the file's behavior. At a minimum, the read() and/or write() -operations should be provided; others can be included as needed. Again, -the return value will be a dentry pointer to the created file, -ERR_PTR(-ERROR) on error, or ERR_PTR(-ENODEV) if debugfs support is -missing. - -Create a file with an initial size, the following function can be used -instead: - - struct dentry *debugfs_create_file_size(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops, - loff_t file_size); - -file_size is the initial file size. The other parameters are the same -as the function debugfs_create_file. - -In a number of cases, the creation of a set of file operations is not -actually necessary; the debugfs code provides a number of helper functions -for simple situations. Files containing a single integer value can be -created with any of: - - void debugfs_create_u8(const char *name, umode_t mode, - struct dentry *parent, u8 *value); - void debugfs_create_u16(const char *name, umode_t mode, - struct dentry *parent, u16 *value); - struct dentry *debugfs_create_u32(const char *name, umode_t mode, - struct dentry *parent, u32 *value); - void debugfs_create_u64(const char *name, umode_t mode, - struct dentry *parent, u64 *value); - -These files support both reading and writing the given value; if a specific -file should not be written to, simply set the mode bits accordingly. The -values in these files are in decimal; if hexadecimal is more appropriate, -the following functions can be used instead: - - void debugfs_create_x8(const char *name, umode_t mode, - struct dentry *parent, u8 *value); - void debugfs_create_x16(const char *name, umode_t mode, - struct dentry *parent, u16 *value); - void debugfs_create_x32(const char *name, umode_t mode, - struct dentry *parent, u32 *value); - void debugfs_create_x64(const char *name, umode_t mode, - struct dentry *parent, u64 *value); - -These functions are useful as long as the developer knows the size of the -value to be exported. Some types can have different widths on different -architectures, though, complicating the situation somewhat. There are -functions meant to help out in such special cases: - - void debugfs_create_size_t(const char *name, umode_t mode, - struct dentry *parent, size_t *value); - -As might be expected, this function will create a debugfs file to represent -a variable of type size_t. - -Similarly, there are helpers for variables of type unsigned long, in decimal -and hexadecimal: - - struct dentry *debugfs_create_ulong(const char *name, umode_t mode, - struct dentry *parent, - unsigned long *value); - void debugfs_create_xul(const char *name, umode_t mode, - struct dentry *parent, unsigned long *value); - -Boolean values can be placed in debugfs with: - - struct dentry *debugfs_create_bool(const char *name, umode_t mode, - struct dentry *parent, bool *value); - -A read on the resulting file will yield either Y (for non-zero values) or -N, followed by a newline. If written to, it will accept either upper- or -lower-case values, or 1 or 0. Any other input will be silently ignored. - -Also, atomic_t values can be placed in debugfs with: - - void debugfs_create_atomic_t(const char *name, umode_t mode, - struct dentry *parent, atomic_t *value) - -A read of this file will get atomic_t values, and a write of this file -will set atomic_t values. - -Another option is exporting a block of arbitrary binary data, with -this structure and function: - - struct debugfs_blob_wrapper { - void *data; - unsigned long size; - }; - - struct dentry *debugfs_create_blob(const char *name, umode_t mode, - struct dentry *parent, - struct debugfs_blob_wrapper *blob); - -A read of this file will return the data pointed to by the -debugfs_blob_wrapper structure. Some drivers use "blobs" as a simple way -to return several lines of (static) formatted text output. This function -can be used to export binary information, but there does not appear to be -any code which does so in the mainline. Note that all files created with -debugfs_create_blob() are read-only. - -If you want to dump a block of registers (something that happens quite -often during development, even if little such code reaches mainline. -Debugfs offers two functions: one to make a registers-only file, and -another to insert a register block in the middle of another sequential -file. - - struct debugfs_reg32 { - char *name; - unsigned long offset; - }; - - struct debugfs_regset32 { - struct debugfs_reg32 *regs; - int nregs; - void __iomem *base; - }; - - struct dentry *debugfs_create_regset32(const char *name, umode_t mode, - struct dentry *parent, - struct debugfs_regset32 *regset); - - void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs, - int nregs, void __iomem *base, char *prefix); - -The "base" argument may be 0, but you may want to build the reg32 array -using __stringify, and a number of register names (macros) are actually -byte offsets over a base for the register block. - -If you want to dump an u32 array in debugfs, you can create file with: - - void debugfs_create_u32_array(const char *name, umode_t mode, - struct dentry *parent, - u32 *array, u32 elements); - -The "array" argument provides data, and the "elements" argument is -the number of elements in the array. Note: Once array is created its -size can not be changed. - -There is a helper function to create device related seq_file: - - struct dentry *debugfs_create_devm_seqfile(struct device *dev, - const char *name, - struct dentry *parent, - int (*read_fn)(struct seq_file *s, - void *data)); - -The "dev" argument is the device related to this debugfs file, and -the "read_fn" is a function pointer which to be called to print the -seq_file content. - -There are a couple of other directory-oriented helper functions: - - struct dentry *debugfs_rename(struct dentry *old_dir, - struct dentry *old_dentry, - struct dentry *new_dir, - const char *new_name); - - struct dentry *debugfs_create_symlink(const char *name, - struct dentry *parent, - const char *target); - -A call to debugfs_rename() will give a new name to an existing debugfs -file, possibly in a different directory. The new_name must not exist prior -to the call; the return value is old_dentry with updated information. -Symbolic links can be created with debugfs_create_symlink(). - -There is one important thing that all debugfs users must take into account: -there is no automatic cleanup of any directories created in debugfs. If a -module is unloaded without explicitly removing debugfs entries, the result -will be a lot of stale pointers and no end of highly antisocial behavior. -So all debugfs users - at least those which can be built as modules - must -be prepared to remove all files and directories they create there. A file -can be removed with: - - void debugfs_remove(struct dentry *dentry); - -The dentry value can be NULL or an error value, in which case nothing will -be removed. - -Once upon a time, debugfs users were required to remember the dentry -pointer for every debugfs file they created so that all files could be -cleaned up. We live in more civilized times now, though, and debugfs users -can call: - - void debugfs_remove_recursive(struct dentry *dentry); - -If this function is passed a pointer for the dentry corresponding to the -top-level directory, the entire hierarchy below that directory will be -removed. - -Notes: - [1] http://lwn.net/Articles/309298/ diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 8fe848ea04af..ab3b656bbe60 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -57,6 +57,7 @@ Documentation for filesystem implementations. btrfs ceph cramfs + debugfs fuse overlayfs virtiofs -- cgit v1.2.3 From 14a19fa5cf759ea18bc7d692cd8fe326af3c4d0a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:58 +0100 Subject: docs: filesystems: convert dlmfs.txt to ReST - Add a SPDX header; - Use copyright symbol; - Adjust document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/efc9e59925723e17d1a4741b11049616c221463e.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/dlmfs.rst | 140 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/dlmfs.txt | 130 --------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 141 insertions(+), 130 deletions(-) create mode 100644 Documentation/filesystems/dlmfs.rst delete mode 100644 Documentation/filesystems/dlmfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/dlmfs.rst b/Documentation/filesystems/dlmfs.rst new file mode 100644 index 000000000000..68daaa7facf9 --- /dev/null +++ b/Documentation/filesystems/dlmfs.rst @@ -0,0 +1,140 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. include:: + +===== +DLMFS +===== + +A minimal DLM userspace interface implemented via a virtual file +system. + +dlmfs is built with OCFS2 as it requires most of its infrastructure. + +:Project web page: http://ocfs2.wiki.kernel.org +:Tools web page: https://github.com/markfasheh/ocfs2-tools +:OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ + +All code copyright 2005 Oracle except when otherwise noted. + +Credits +======= + +Some code taken from ramfs which is Copyright |copy| 2000 Linus Torvalds +and Transmeta Corp. + +Mark Fasheh + +Caveats +======= +- Right now it only works with the OCFS2 DLM, though support for other + DLM implementations should not be a major issue. + +Mount options +============= +None + +Usage +===== + +If you're just interested in OCFS2, then please see ocfs2.txt. The +rest of this document will be geared towards those who want to use +dlmfs for easy to setup and easy to use clustered locking in +userspace. + +Setup +===== + +dlmfs requires that the OCFS2 cluster infrastructure be in +place. Please download ocfs2-tools from the above url and configure a +cluster. + +You'll want to start heartbeating on a volume which all the nodes in +your lockspace can access. The easiest way to do this is via +ocfs2_hb_ctl (distributed with ocfs2-tools). Right now it requires +that an OCFS2 file system be in place so that it can automatically +find its heartbeat area, though it will eventually support heartbeat +against raw disks. + +Please see the ocfs2_hb_ctl and mkfs.ocfs2 manual pages distributed +with ocfs2-tools. + +Once you're heartbeating, DLM lock 'domains' can be easily created / +destroyed and locks within them accessed. + +Locking +======= + +Users may access dlmfs via standard file system calls, or they can use +'libo2dlm' (distributed with ocfs2-tools) which abstracts the file +system calls and presents a more traditional locking api. + +dlmfs handles lock caching automatically for the user, so a lock +request for an already acquired lock will not generate another DLM +call. Userspace programs are assumed to handle their own local +locking. + +Two levels of locks are supported - Shared Read, and Exclusive. +Also supported is a Trylock operation. + +For information on the libo2dlm interface, please see o2dlm.h, +distributed with ocfs2-tools. + +Lock value blocks can be read and written to a resource via read(2) +and write(2) against the fd obtained via your open(2) call. The +maximum currently supported LVB length is 64 bytes (though that is an +OCFS2 DLM limitation). Through this mechanism, users of dlmfs can share +small amounts of data amongst their nodes. + +mkdir(2) signals dlmfs to join a domain (which will have the same name +as the resulting directory) + +rmdir(2) signals dlmfs to leave the domain + +Locks for a given domain are represented by regular inodes inside the +domain directory. Locking against them is done via the open(2) system +call. + +The open(2) call will not return until your lock has been granted or +an error has occurred, unless it has been instructed to do a trylock +operation. If the lock succeeds, you'll get an fd. + +open(2) with O_CREAT to ensure the resource inode is created - dlmfs does +not automatically create inodes for existing lock resources. + +============ =========================== +Open Flag Lock Request Type +============ =========================== +O_RDONLY Shared Read +O_RDWR Exclusive +============ =========================== + + +============ =========================== +Open Flag Resulting Locking Behavior +============ =========================== +O_NONBLOCK Trylock operation +============ =========================== + +You must provide exactly one of O_RDONLY or O_RDWR. + +If O_NONBLOCK is also provided and the trylock operation was valid but +could not lock the resource then open(2) will return ETXTBUSY. + +close(2) drops the lock associated with your fd. + +Modes passed to mkdir(2) or open(2) are adhered to locally. Chown is +supported locally as well. This means you can use them to restrict +access to the resources via dlmfs on your local node only. + +The resource LVB may be read from the fd in either Shared Read or +Exclusive modes via the read(2) system call. It can be written via +write(2) only when open in Exclusive mode. + +Once written, an LVB will be visible to other nodes who obtain Read +Only or higher level locks on the resource. + +See Also +======== +http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf + +For more information on the VMS distributed locking API. diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.txt deleted file mode 100644 index fcf4d509d118..000000000000 --- a/Documentation/filesystems/dlmfs.txt +++ /dev/null @@ -1,130 +0,0 @@ -dlmfs -================== -A minimal DLM userspace interface implemented via a virtual file -system. - -dlmfs is built with OCFS2 as it requires most of its infrastructure. - -Project web page: http://ocfs2.wiki.kernel.org -Tools web page: https://github.com/markfasheh/ocfs2-tools -OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ - -All code copyright 2005 Oracle except when otherwise noted. - -CREDITS -======= - -Some code taken from ramfs which is Copyright (C) 2000 Linus Torvalds -and Transmeta Corp. - -Mark Fasheh - -Caveats -======= -- Right now it only works with the OCFS2 DLM, though support for other - DLM implementations should not be a major issue. - -Mount options -============= -None - -Usage -===== - -If you're just interested in OCFS2, then please see ocfs2.txt. The -rest of this document will be geared towards those who want to use -dlmfs for easy to setup and easy to use clustered locking in -userspace. - -Setup -===== - -dlmfs requires that the OCFS2 cluster infrastructure be in -place. Please download ocfs2-tools from the above url and configure a -cluster. - -You'll want to start heartbeating on a volume which all the nodes in -your lockspace can access. The easiest way to do this is via -ocfs2_hb_ctl (distributed with ocfs2-tools). Right now it requires -that an OCFS2 file system be in place so that it can automatically -find its heartbeat area, though it will eventually support heartbeat -against raw disks. - -Please see the ocfs2_hb_ctl and mkfs.ocfs2 manual pages distributed -with ocfs2-tools. - -Once you're heartbeating, DLM lock 'domains' can be easily created / -destroyed and locks within them accessed. - -Locking -======= - -Users may access dlmfs via standard file system calls, or they can use -'libo2dlm' (distributed with ocfs2-tools) which abstracts the file -system calls and presents a more traditional locking api. - -dlmfs handles lock caching automatically for the user, so a lock -request for an already acquired lock will not generate another DLM -call. Userspace programs are assumed to handle their own local -locking. - -Two levels of locks are supported - Shared Read, and Exclusive. -Also supported is a Trylock operation. - -For information on the libo2dlm interface, please see o2dlm.h, -distributed with ocfs2-tools. - -Lock value blocks can be read and written to a resource via read(2) -and write(2) against the fd obtained via your open(2) call. The -maximum currently supported LVB length is 64 bytes (though that is an -OCFS2 DLM limitation). Through this mechanism, users of dlmfs can share -small amounts of data amongst their nodes. - -mkdir(2) signals dlmfs to join a domain (which will have the same name -as the resulting directory) - -rmdir(2) signals dlmfs to leave the domain - -Locks for a given domain are represented by regular inodes inside the -domain directory. Locking against them is done via the open(2) system -call. - -The open(2) call will not return until your lock has been granted or -an error has occurred, unless it has been instructed to do a trylock -operation. If the lock succeeds, you'll get an fd. - -open(2) with O_CREAT to ensure the resource inode is created - dlmfs does -not automatically create inodes for existing lock resources. - -Open Flag Lock Request Type ---------- ----------------- -O_RDONLY Shared Read -O_RDWR Exclusive - -Open Flag Resulting Locking Behavior ---------- -------------------------- -O_NONBLOCK Trylock operation - -You must provide exactly one of O_RDONLY or O_RDWR. - -If O_NONBLOCK is also provided and the trylock operation was valid but -could not lock the resource then open(2) will return ETXTBUSY. - -close(2) drops the lock associated with your fd. - -Modes passed to mkdir(2) or open(2) are adhered to locally. Chown is -supported locally as well. This means you can use them to restrict -access to the resources via dlmfs on your local node only. - -The resource LVB may be read from the fd in either Shared Read or -Exclusive modes via the read(2) system call. It can be written via -write(2) only when open in Exclusive mode. - -Once written, an LVB will be visible to other nodes who obtain Read -Only or higher level locks on the resource. - -See Also -======== -http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf - -For more information on the VMS distributed locking API. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index ab3b656bbe60..c6885c7ef781 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -58,6 +58,7 @@ Documentation for filesystem implementations. ceph cramfs debugfs + dlmfs fuse overlayfs virtiofs -- cgit v1.2.3 From b02a17cb8ae23479c9bf306e96d2dd71422de63f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:11:59 +0100 Subject: docs: filesystems: convert ecryptfs.txt to ReST - Add a SPDX header; - Add a document title; - use :field: markup; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Tyler Hicks Link: https://lore.kernel.org/r/6e13841ebd00c8d988027115c75c58821bb41a0c.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/ecryptfs.rst | 87 ++++++++++++++++++++++++++++++++++ Documentation/filesystems/ecryptfs.txt | 77 ------------------------------ Documentation/filesystems/index.rst | 1 + 3 files changed, 88 insertions(+), 77 deletions(-) create mode 100644 Documentation/filesystems/ecryptfs.rst delete mode 100644 Documentation/filesystems/ecryptfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/ecryptfs.rst b/Documentation/filesystems/ecryptfs.rst new file mode 100644 index 000000000000..7236172300ef --- /dev/null +++ b/Documentation/filesystems/ecryptfs.rst @@ -0,0 +1,87 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================================================== +eCryptfs: A stacked cryptographic filesystem for Linux +====================================================== + +eCryptfs is free software. Please see the file COPYING for details. +For documentation, please see the files in the doc/ subdirectory. For +building and installation instructions please see the INSTALL file. + +:Maintainer: Phillip Hellewell +:Lead developer: Michael A. Halcrow +:Developers: Michael C. Thompson + Kent Yoder +:Web Site: http://ecryptfs.sf.net + +This software is currently undergoing development. Make sure to +maintain a backup copy of any data you write into eCryptfs. + +eCryptfs requires the userspace tools downloadable from the +SourceForge site: + +http://sourceforge.net/projects/ecryptfs/ + +Userspace requirements include: + +- David Howells' userspace keyring headers and libraries (version + 1.0 or higher), obtainable from + http://people.redhat.com/~dhowells/keyutils/ +- Libgcrypt + + +Notes +===== + +In the beta/experimental releases of eCryptfs, when you upgrade +eCryptfs, you should copy the files to an unencrypted location and +then copy the files back into the new eCryptfs mount to migrate the +files. + + +Mount-wide Passphrase +===================== + +Create a new directory into which eCryptfs will write its encrypted +files (i.e., /root/crypt). Then, create the mount point directory +(i.e., /mnt/crypt). Now it's time to mount eCryptfs:: + + mount -t ecryptfs /root/crypt /mnt/crypt + +You should be prompted for a passphrase and a salt (the salt may be +blank). + +Try writing a new file:: + + echo "Hello, World" > /mnt/crypt/hello.txt + +The operation will complete. Notice that there is a new file in +/root/crypt that is at least 12288 bytes in size (depending on your +host page size). This is the encrypted underlying file for what you +just wrote. To test reading, from start to finish, you need to clear +the user session keyring: + +keyctl clear @u + +Then umount /mnt/crypt and mount again per the instructions given +above. + +:: + + cat /mnt/crypt/hello.txt + + +Notes +===== + +eCryptfs version 0.1 should only be mounted on (1) empty directories +or (2) directories containing files only created by eCryptfs. If you +mount a directory that has pre-existing files not created by eCryptfs, +then behavior is undefined. Do not run eCryptfs in higher verbosity +levels unless you are doing so for the sole purpose of debugging or +development, since secret values will be written out to the system log +in that case. + + +Mike Halcrow +mhalcrow@us.ibm.com diff --git a/Documentation/filesystems/ecryptfs.txt b/Documentation/filesystems/ecryptfs.txt deleted file mode 100644 index 01d8a08351ac..000000000000 --- a/Documentation/filesystems/ecryptfs.txt +++ /dev/null @@ -1,77 +0,0 @@ -eCryptfs: A stacked cryptographic filesystem for Linux - -eCryptfs is free software. Please see the file COPYING for details. -For documentation, please see the files in the doc/ subdirectory. For -building and installation instructions please see the INSTALL file. - -Maintainer: Phillip Hellewell -Lead developer: Michael A. Halcrow -Developers: Michael C. Thompson - Kent Yoder -Web Site: http://ecryptfs.sf.net - -This software is currently undergoing development. Make sure to -maintain a backup copy of any data you write into eCryptfs. - -eCryptfs requires the userspace tools downloadable from the -SourceForge site: - -http://sourceforge.net/projects/ecryptfs/ - -Userspace requirements include: - - David Howells' userspace keyring headers and libraries (version - 1.0 or higher), obtainable from - http://people.redhat.com/~dhowells/keyutils/ - - Libgcrypt - - -NOTES - -In the beta/experimental releases of eCryptfs, when you upgrade -eCryptfs, you should copy the files to an unencrypted location and -then copy the files back into the new eCryptfs mount to migrate the -files. - - -MOUNT-WIDE PASSPHRASE - -Create a new directory into which eCryptfs will write its encrypted -files (i.e., /root/crypt). Then, create the mount point directory -(i.e., /mnt/crypt). Now it's time to mount eCryptfs: - -mount -t ecryptfs /root/crypt /mnt/crypt - -You should be prompted for a passphrase and a salt (the salt may be -blank). - -Try writing a new file: - -echo "Hello, World" > /mnt/crypt/hello.txt - -The operation will complete. Notice that there is a new file in -/root/crypt that is at least 12288 bytes in size (depending on your -host page size). This is the encrypted underlying file for what you -just wrote. To test reading, from start to finish, you need to clear -the user session keyring: - -keyctl clear @u - -Then umount /mnt/crypt and mount again per the instructions given -above. - -cat /mnt/crypt/hello.txt - - -NOTES - -eCryptfs version 0.1 should only be mounted on (1) empty directories -or (2) directories containing files only created by eCryptfs. If you -mount a directory that has pre-existing files not created by eCryptfs, -then behavior is undefined. Do not run eCryptfs in higher verbosity -levels unless you are doing so for the sole purpose of debugging or -development, since secret values will be written out to the system log -in that case. - - -Mike Halcrow -mhalcrow@us.ibm.com diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index c6885c7ef781..d6d69f1c9287 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -59,6 +59,7 @@ Documentation for filesystem implementations. cramfs debugfs dlmfs + ecryptfs fuse overlayfs virtiofs -- cgit v1.2.3 From 06dedb45b79c6550b878244879f33b6e614126bd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:00 +0100 Subject: docs: filesystems: convert efivarfs.txt to ReST Trivial changes: - Add a SPDX header; - Adjust document title; - Mark a literal block as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/215691d747055c4ccb038ec7d78d8d1fe87fe2c0.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/efivarfs.rst | 26 ++++++++++++++++++++++++++ Documentation/filesystems/efivarfs.txt | 23 ----------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 27 insertions(+), 23 deletions(-) create mode 100644 Documentation/filesystems/efivarfs.rst delete mode 100644 Documentation/filesystems/efivarfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/efivarfs.rst b/Documentation/filesystems/efivarfs.rst new file mode 100644 index 000000000000..90ac65683e7e --- /dev/null +++ b/Documentation/filesystems/efivarfs.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================= +efivarfs - a (U)EFI variable filesystem +======================================= + +The efivarfs filesystem was created to address the shortcomings of +using entries in sysfs to maintain EFI variables. The old sysfs EFI +variables code only supported variables of up to 1024 bytes. This +limitation existed in version 0.99 of the EFI specification, but was +removed before any full releases. Since variables can now be larger +than a single page, sysfs isn't the best interface for this. + +Variables can be created, deleted and modified with the efivarfs +filesystem. + +efivarfs is typically mounted like this:: + + mount -t efivarfs none /sys/firmware/efi/efivars + +Due to the presence of numerous firmware bugs where removing non-standard +UEFI variables causes the system firmware to fail to POST, efivarfs +files that are not well-known standardized variables are created +as immutable files. This doesn't prevent removal - "chattr -i" will work - +but it does prevent this kind of failure from being accomplished +accidentally. diff --git a/Documentation/filesystems/efivarfs.txt b/Documentation/filesystems/efivarfs.txt deleted file mode 100644 index 686a64bba775..000000000000 --- a/Documentation/filesystems/efivarfs.txt +++ /dev/null @@ -1,23 +0,0 @@ - -efivarfs - a (U)EFI variable filesystem - -The efivarfs filesystem was created to address the shortcomings of -using entries in sysfs to maintain EFI variables. The old sysfs EFI -variables code only supported variables of up to 1024 bytes. This -limitation existed in version 0.99 of the EFI specification, but was -removed before any full releases. Since variables can now be larger -than a single page, sysfs isn't the best interface for this. - -Variables can be created, deleted and modified with the efivarfs -filesystem. - -efivarfs is typically mounted like this, - - mount -t efivarfs none /sys/firmware/efi/efivars - -Due to the presence of numerous firmware bugs where removing non-standard -UEFI variables causes the system firmware to fail to POST, efivarfs -files that are not well-known standardized variables are created -as immutable files. This doesn't prevent removal - "chattr -i" will work - -but it does prevent this kind of failure from being accomplished -accidentally. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index d6d69f1c9287..4230f49d2732 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -60,6 +60,7 @@ Documentation for filesystem implementations. debugfs dlmfs ecryptfs + efivarfs fuse overlayfs virtiofs -- cgit v1.2.3 From e66d8631ddb3306bd9f463324c2d9a5d9dc559f7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:01 +0100 Subject: docs: filesystems: convert erofs.txt to ReST - Add a SPDX header; - Add a document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add lists markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/402d1d2f7252b8a683f7a9c6867bc5428da64026.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/erofs.rst | 240 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/erofs.txt | 211 ------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 241 insertions(+), 211 deletions(-) create mode 100644 Documentation/filesystems/erofs.rst delete mode 100644 Documentation/filesystems/erofs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst new file mode 100644 index 000000000000..bf145171c2bf --- /dev/null +++ b/Documentation/filesystems/erofs.rst @@ -0,0 +1,240 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================================== +Enhanced Read-Only File System - EROFS +====================================== + +Overview +======== + +EROFS file-system stands for Enhanced Read-Only File System. Different +from other read-only file systems, it aims to be designed for flexibility, +scalability, but be kept simple and high performance. + +It is designed as a better filesystem solution for the following scenarios: + + - read-only storage media or + + - part of a fully trusted read-only solution, which means it needs to be + immutable and bit-for-bit identical to the official golden image for + their releases due to security and other considerations and + + - hope to save some extra storage space with guaranteed end-to-end performance + by using reduced metadata and transparent file compression, especially + for those embedded devices with limited memory (ex, smartphone); + +Here is the main features of EROFS: + + - Little endian on-disk design; + + - Currently 4KB block size (nobh) and therefore maximum 16TB address space; + + - Metadata & data could be mixed by design; + + - 2 inode versions for different requirements: + + ===================== ============ ===================================== + compact (v1) extended (v2) + ===================== ============ ===================================== + Inode metadata size 32 bytes 64 bytes + Max file size 4 GB 16 EB (also limited by max. vol size) + Max uids/gids 65536 4294967296 + File change time no yes (64 + 32-bit timestamp) + Max hardlinks 65536 4294967296 + Metadata reserved 4 bytes 14 bytes + ===================== ============ ===================================== + + - Support extended attributes (xattrs) as an option; + + - Support xattr inline and tail-end data inline for all files; + + - Support POSIX.1e ACLs by using xattrs; + + - Support transparent file compression as an option: + LZ4 algorithm with 4 KB fixed-sized output compression for high performance. + +The following git tree provides the file system user-space tools under +development (ex, formatting tool mkfs.erofs): + +- git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git + +Bugs and patches are welcome, please kindly help us and send to the following +linux-erofs mailing list: + +- linux-erofs mailing list + +Mount options +============= + +=================== ========================================================= +(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled + by default if CONFIG_EROFS_FS_XATTR is selected. +(no)acl Setup POSIX Access Control List. Note: acl is enabled + by default if CONFIG_EROFS_FS_POSIX_ACL is selected. +cache_strategy=%s Select a strategy for cached decompression from now on: + + ========== ============================================= + disabled In-place I/O decompression only; + readahead Cache the last incomplete compressed physical + cluster for further reading. It still does + in-place I/O decompression for the rest + compressed physical clusters; + readaround Cache the both ends of incomplete compressed + physical clusters for further reading. + It still does in-place I/O decompression + for the rest compressed physical clusters. + ========== ============================================= +=================== ========================================================= + +On-disk details +=============== + +Summary +------- +Different from other read-only file systems, an EROFS volume is designed +to be as simple as possible:: + + |-> aligned with the block size + ____________________________________________________________ + | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data | + |_|__|_|_____|__________|_____|______|__________|_____|______| + 0 +1K + +All data areas should be aligned with the block size, but metadata areas +may not. All metadatas can be now observed in two different spaces (views): + + 1. Inode metadata space + + Each valid inode should be aligned with an inode slot, which is a fixed + value (32 bytes) and designed to be kept in line with compact inode size. + + Each inode can be directly found with the following formula: + inode offset = meta_blkaddr * block_size + 32 * nid + + :: + + |-> aligned with 8B + |-> followed closely + + meta_blkaddr blocks |-> another slot + _____________________________________________________________________ + | ... | inode | xattrs | extents | data inline | ... | inode ... + |________|_______|(optional)|(optional)|__(optional)_|_____|__________ + |-> aligned with the inode slot size + . . + . . + . . + . . + . . + . . + .____________________________________________________|-> aligned with 4B + | xattr_ibody_header | shared xattrs | inline xattrs | + |____________________|_______________|_______________| + |-> 12 bytes <-|->x * 4 bytes<-| . + . . . + . . . + . . . + ._______________________________.______________________. + | id | id | id | id | ... | id | ent | ... | ent| ... | + |____|____|____|____|______|____|_____|_____|____|_____| + |-> aligned with 4B + |-> aligned with 4B + + Inode could be 32 or 64 bytes, which can be distinguished from a common + field which all inode versions have -- i_format:: + + __________________ __________________ + | i_format | | i_format | + |__________________| |__________________| + | ... | | ... | + | | | | + |__________________| 32 bytes | | + | | + |__________________| 64 bytes + + Xattrs, extents, data inline are followed by the corresponding inode with + proper alignment, and they could be optional for different data mappings. + _currently_ total 4 valid data mappings are supported: + + == ==================================================================== + 0 flat file data without data inline (no extent); + 1 fixed-sized output data compression (with non-compacted indexes); + 2 flat file data with tail packing data inline (no extent); + 3 fixed-sized output data compression (with compacted indexes, v5.3+). + == ==================================================================== + + The size of the optional xattrs is indicated by i_xattr_count in inode + header. Large xattrs or xattrs shared by many different files can be + stored in shared xattrs metadata rather than inlined right after inode. + + 2. Shared xattrs metadata space + + Shared xattrs space is similar to the above inode space, started with + a specific block indicated by xattr_blkaddr, organized one by one with + proper align. + + Each share xattr can also be directly found by the following formula: + xattr offset = xattr_blkaddr * block_size + 4 * xattr_id + + :: + + |-> aligned by 4 bytes + + xattr_blkaddr blocks |-> aligned with 4 bytes + _________________________________________________________________________ + | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... + |________|_____________|_____________|_____|______________|_______________ + +Directories +----------- +All directories are now organized in a compact on-disk format. Note that +each directory block is divided into index and name areas in order to support +random file lookup, and all directory entries are _strictly_ recorded in +alphabetical order in order to support improved prefix binary search +algorithm (could refer to the related source code). + +:: + + ___________________________ + / | + / ______________|________________ + / / | nameoff1 | nameoffN-1 + ____________.______________._______________v________________v__________ + | dirent | dirent | ... | dirent | filename | filename | ... | filename | + |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| + \ ^ + \ | * could have + \ | trailing '\0' + \________________________| nameoff0 + + Directory block + +Note that apart from the offset of the first filename, nameoff0 also indicates +the total number of directory entries in this block since it is no need to +introduce another on-disk field at all. + +Compression +----------- +Currently, EROFS supports 4KB fixed-sized output transparent file compression, +as illustrated below:: + + |---- Variant-Length Extent ----|-------- VLE --------|----- VLE ----- + clusterofs clusterofs clusterofs + | | | logical data + _________v_______________________________v_____________________v_______________ + ... | . | | . | | . | ... + ____|____.________|_____________|________.____|_____________|__.__________|____ + |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-| + size size size size size + . . . . + . . . . + . . . . + _______._____________._____________._____________._____________________ + ... | | | | ... physical data + _______|_____________|_____________|_____________|_____________________ + |-> cluster <-|-> cluster <-|-> cluster <-| + size size size + +Currently each on-disk physical cluster can contain 4KB (un)compressed data +at most. For each logical cluster, there is a corresponding on-disk index to +describe its cluster type, physical cluster address, etc. + +See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. diff --git a/Documentation/filesystems/erofs.txt b/Documentation/filesystems/erofs.txt deleted file mode 100644 index db6d39c3ae71..000000000000 --- a/Documentation/filesystems/erofs.txt +++ /dev/null @@ -1,211 +0,0 @@ -Overview -======== - -EROFS file-system stands for Enhanced Read-Only File System. Different -from other read-only file systems, it aims to be designed for flexibility, -scalability, but be kept simple and high performance. - -It is designed as a better filesystem solution for the following scenarios: - - read-only storage media or - - - part of a fully trusted read-only solution, which means it needs to be - immutable and bit-for-bit identical to the official golden image for - their releases due to security and other considerations and - - - hope to save some extra storage space with guaranteed end-to-end performance - by using reduced metadata and transparent file compression, especially - for those embedded devices with limited memory (ex, smartphone); - -Here is the main features of EROFS: - - Little endian on-disk design; - - - Currently 4KB block size (nobh) and therefore maximum 16TB address space; - - - Metadata & data could be mixed by design; - - - 2 inode versions for different requirements: - compact (v1) extended (v2) - Inode metadata size: 32 bytes 64 bytes - Max file size: 4 GB 16 EB (also limited by max. vol size) - Max uids/gids: 65536 4294967296 - File change time: no yes (64 + 32-bit timestamp) - Max hardlinks: 65536 4294967296 - Metadata reserved: 4 bytes 14 bytes - - - Support extended attributes (xattrs) as an option; - - - Support xattr inline and tail-end data inline for all files; - - - Support POSIX.1e ACLs by using xattrs; - - - Support transparent file compression as an option: - LZ4 algorithm with 4 KB fixed-sized output compression for high performance. - -The following git tree provides the file system user-space tools under -development (ex, formatting tool mkfs.erofs): ->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git - -Bugs and patches are welcome, please kindly help us and send to the following -linux-erofs mailing list: ->> linux-erofs mailing list - -Mount options -============= - -(no)user_xattr Setup Extended User Attributes. Note: xattr is enabled - by default if CONFIG_EROFS_FS_XATTR is selected. -(no)acl Setup POSIX Access Control List. Note: acl is enabled - by default if CONFIG_EROFS_FS_POSIX_ACL is selected. -cache_strategy=%s Select a strategy for cached decompression from now on: - disabled: In-place I/O decompression only; - readahead: Cache the last incomplete compressed physical - cluster for further reading. It still does - in-place I/O decompression for the rest - compressed physical clusters; - readaround: Cache the both ends of incomplete compressed - physical clusters for further reading. - It still does in-place I/O decompression - for the rest compressed physical clusters. - -On-disk details -=============== - -Summary -------- -Different from other read-only file systems, an EROFS volume is designed -to be as simple as possible: - - |-> aligned with the block size - ____________________________________________________________ - | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data | - |_|__|_|_____|__________|_____|______|__________|_____|______| - 0 +1K - -All data areas should be aligned with the block size, but metadata areas -may not. All metadatas can be now observed in two different spaces (views): - 1. Inode metadata space - Each valid inode should be aligned with an inode slot, which is a fixed - value (32 bytes) and designed to be kept in line with compact inode size. - - Each inode can be directly found with the following formula: - inode offset = meta_blkaddr * block_size + 32 * nid - - |-> aligned with 8B - |-> followed closely - + meta_blkaddr blocks |-> another slot - _____________________________________________________________________ - | ... | inode | xattrs | extents | data inline | ... | inode ... - |________|_______|(optional)|(optional)|__(optional)_|_____|__________ - |-> aligned with the inode slot size - . . - . . - . . - . . - . . - . . - .____________________________________________________|-> aligned with 4B - | xattr_ibody_header | shared xattrs | inline xattrs | - |____________________|_______________|_______________| - |-> 12 bytes <-|->x * 4 bytes<-| . - . . . - . . . - . . . - ._______________________________.______________________. - | id | id | id | id | ... | id | ent | ... | ent| ... | - |____|____|____|____|______|____|_____|_____|____|_____| - |-> aligned with 4B - |-> aligned with 4B - - Inode could be 32 or 64 bytes, which can be distinguished from a common - field which all inode versions have -- i_format: - - __________________ __________________ - | i_format | | i_format | - |__________________| |__________________| - | ... | | ... | - | | | | - |__________________| 32 bytes | | - | | - |__________________| 64 bytes - - Xattrs, extents, data inline are followed by the corresponding inode with - proper alignment, and they could be optional for different data mappings. - _currently_ total 4 valid data mappings are supported: - - 0 flat file data without data inline (no extent); - 1 fixed-sized output data compression (with non-compacted indexes); - 2 flat file data with tail packing data inline (no extent); - 3 fixed-sized output data compression (with compacted indexes, v5.3+). - - The size of the optional xattrs is indicated by i_xattr_count in inode - header. Large xattrs or xattrs shared by many different files can be - stored in shared xattrs metadata rather than inlined right after inode. - - 2. Shared xattrs metadata space - Shared xattrs space is similar to the above inode space, started with - a specific block indicated by xattr_blkaddr, organized one by one with - proper align. - - Each share xattr can also be directly found by the following formula: - xattr offset = xattr_blkaddr * block_size + 4 * xattr_id - - |-> aligned by 4 bytes - + xattr_blkaddr blocks |-> aligned with 4 bytes - _________________________________________________________________________ - | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... - |________|_____________|_____________|_____|______________|_______________ - -Directories ------------ -All directories are now organized in a compact on-disk format. Note that -each directory block is divided into index and name areas in order to support -random file lookup, and all directory entries are _strictly_ recorded in -alphabetical order in order to support improved prefix binary search -algorithm (could refer to the related source code). - - ___________________________ - / | - / ______________|________________ - / / | nameoff1 | nameoffN-1 - ____________.______________._______________v________________v__________ -| dirent | dirent | ... | dirent | filename | filename | ... | filename | -|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| - \ ^ - \ | * could have - \ | trailing '\0' - \________________________| nameoff0 - - Directory block - -Note that apart from the offset of the first filename, nameoff0 also indicates -the total number of directory entries in this block since it is no need to -introduce another on-disk field at all. - -Compression ------------ -Currently, EROFS supports 4KB fixed-sized output transparent file compression, -as illustrated below: - - |---- Variant-Length Extent ----|-------- VLE --------|----- VLE ----- - clusterofs clusterofs clusterofs - | | | logical data -_________v_______________________________v_____________________v_______________ -... | . | | . | | . | ... -____|____.________|_____________|________.____|_____________|__.__________|____ - |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-| - size size size size size - . . . . - . . . . - . . . . - _______._____________._____________._____________._____________________ - ... | | | | ... physical data - _______|_____________|_____________|_____________|_____________________ - |-> cluster <-|-> cluster <-|-> cluster <-| - size size size - -Currently each on-disk physical cluster can contain 4KB (un)compressed data -at most. For each logical cluster, there is a corresponding on-disk index to -describe its cluster type, physical cluster address, etc. - -See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. - diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 4230f49d2732..03a493b27920 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -61,6 +61,7 @@ Documentation for filesystem implementations. dlmfs ecryptfs efivarfs + erofs fuse overlayfs virtiofs -- cgit v1.2.3 From 6e29ad2ea34f63f2b959807370672af569861378 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:02 +0100 Subject: docs: filesystems: convert ext2.txt to ReST - Add a SPDX header; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Use footnoote markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Jan Kara Link: https://lore.kernel.org/r/fde6721f0303259d830391e351dbde48f67f3ec7.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/ext2.rst | 399 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/ext2.txt | 388 ----------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 400 insertions(+), 388 deletions(-) create mode 100644 Documentation/filesystems/ext2.rst delete mode 100644 Documentation/filesystems/ext2.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/ext2.rst b/Documentation/filesystems/ext2.rst new file mode 100644 index 000000000000..d83dbbb162e2 --- /dev/null +++ b/Documentation/filesystems/ext2.rst @@ -0,0 +1,399 @@ +.. SPDX-License-Identifier: GPL-2.0 + + +The Second Extended Filesystem +============================== + +ext2 was originally released in January 1993. Written by R\'emy Card, +Theodore Ts'o and Stephen Tweedie, it was a major rewrite of the +Extended Filesystem. It is currently still (April 2001) the predominant +filesystem in use by Linux. There are also implementations available +for NetBSD, FreeBSD, the GNU HURD, Windows 95/98/NT, OS/2 and RISC OS. + +Options +======= + +Most defaults are determined by the filesystem superblock, and can be +set using tune2fs(8). Kernel-determined defaults are indicated by (*). + +==================== === ================================================ +bsddf (*) Makes ``df`` act like BSD. +minixdf Makes ``df`` act like Minix. + +check=none, nocheck (*) Don't do extra checking of bitmaps on mount + (check=normal and check=strict options removed) + +dax Use direct access (no page cache). See + Documentation/filesystems/dax.txt. + +debug Extra debugging information is sent to the + kernel syslog. Useful for developers. + +errors=continue Keep going on a filesystem error. +errors=remount-ro Remount the filesystem read-only on an error. +errors=panic Panic and halt the machine if an error occurs. + +grpid, bsdgroups Give objects the same group ID as their parent. +nogrpid, sysvgroups New objects have the group ID of their creator. + +nouid32 Use 16-bit UIDs and GIDs. + +oldalloc Enable the old block allocator. Orlov should + have better performance, we'd like to get some + feedback if it's the contrary for you. +orlov (*) Use the Orlov block allocator. + (See http://lwn.net/Articles/14633/ and + http://lwn.net/Articles/14446/.) + +resuid=n The user ID which may use the reserved blocks. +resgid=n The group ID which may use the reserved blocks. + +sb=n Use alternate superblock at this location. + +user_xattr Enable "user." POSIX Extended Attributes + (requires CONFIG_EXT2_FS_XATTR). +nouser_xattr Don't support "user." extended attributes. + +acl Enable POSIX Access Control Lists support + (requires CONFIG_EXT2_FS_POSIX_ACL). +noacl Don't support POSIX ACLs. + +nobh Do not attach buffer_heads to file pagecache. + +quota, usrquota Enable user disk quota support + (requires CONFIG_QUOTA). + +grpquota Enable group disk quota support + (requires CONFIG_QUOTA). +==================== === ================================================ + +noquota option ls silently ignored by ext2. + + +Specification +============= + +ext2 shares many properties with traditional Unix filesystems. It has +the concepts of blocks, inodes and directories. It has space in the +specification for Access Control Lists (ACLs), fragments, undeletion and +compression though these are not yet implemented (some are available as +separate patches). There is also a versioning mechanism to allow new +features (such as journalling) to be added in a maximally compatible +manner. + +Blocks +------ + +The space in the device or file is split up into blocks. These are +a fixed size, of 1024, 2048 or 4096 bytes (8192 bytes on Alpha systems), +which is decided when the filesystem is created. Smaller blocks mean +less wasted space per file, but require slightly more accounting overhead, +and also impose other limits on the size of files and the filesystem. + +Block Groups +------------ + +Blocks are clustered into block groups in order to reduce fragmentation +and minimise the amount of head seeking when reading a large amount +of consecutive data. Information about each block group is kept in a +descriptor table stored in the block(s) immediately after the superblock. +Two blocks near the start of each group are reserved for the block usage +bitmap and the inode usage bitmap which show which blocks and inodes +are in use. Since each bitmap is limited to a single block, this means +that the maximum size of a block group is 8 times the size of a block. + +The block(s) following the bitmaps in each block group are designated +as the inode table for that block group and the remainder are the data +blocks. The block allocation algorithm attempts to allocate data blocks +in the same block group as the inode which contains them. + +The Superblock +-------------- + +The superblock contains all the information about the configuration of +the filing system. The primary copy of the superblock is stored at an +offset of 1024 bytes from the start of the device, and it is essential +to mounting the filesystem. Since it is so important, backup copies of +the superblock are stored in block groups throughout the filesystem. +The first version of ext2 (revision 0) stores a copy at the start of +every block group, along with backups of the group descriptor block(s). +Because this can consume a considerable amount of space for large +filesystems, later revisions can optionally reduce the number of backup +copies by only putting backups in specific groups (this is the sparse +superblock feature). The groups chosen are 0, 1 and powers of 3, 5 and 7. + +The information in the superblock contains fields such as the total +number of inodes and blocks in the filesystem and how many are free, +how many inodes and blocks are in each block group, when the filesystem +was mounted (and if it was cleanly unmounted), when it was modified, +what version of the filesystem it is (see the Revisions section below) +and which OS created it. + +If the filesystem is revision 1 or higher, then there are extra fields, +such as a volume name, a unique identification number, the inode size, +and space for optional filesystem features to store configuration info. + +All fields in the superblock (as in all other ext2 structures) are stored +on the disc in little endian format, so a filesystem is portable between +machines without having to know what machine it was created on. + +Inodes +------ + +The inode (index node) is a fundamental concept in the ext2 filesystem. +Each object in the filesystem is represented by an inode. The inode +structure contains pointers to the filesystem blocks which contain the +data held in the object and all of the metadata about an object except +its name. The metadata about an object includes the permissions, owner, +group, flags, size, number of blocks used, access time, change time, +modification time, deletion time, number of links, fragments, version +(for NFS) and extended attributes (EAs) and/or Access Control Lists (ACLs). + +There are some reserved fields which are currently unused in the inode +structure and several which are overloaded. One field is reserved for the +directory ACL if the inode is a directory and alternately for the top 32 +bits of the file size if the inode is a regular file (allowing file sizes +larger than 2GB). The translator field is unused under Linux, but is used +by the HURD to reference the inode of a program which will be used to +interpret this object. Most of the remaining reserved fields have been +used up for both Linux and the HURD for larger owner and group fields, +The HURD also has a larger mode field so it uses another of the remaining +fields to store the extra more bits. + +There are pointers to the first 12 blocks which contain the file's data +in the inode. There is a pointer to an indirect block (which contains +pointers to the next set of blocks), a pointer to a doubly-indirect +block (which contains pointers to indirect blocks) and a pointer to a +trebly-indirect block (which contains pointers to doubly-indirect blocks). + +The flags field contains some ext2-specific flags which aren't catered +for by the standard chmod flags. These flags can be listed with lsattr +and changed with the chattr command, and allow specific filesystem +behaviour on a per-file basis. There are flags for secure deletion, +undeletable, compression, synchronous updates, immutability, append-only, +dumpable, no-atime, indexed directories, and data-journaling. Not all +of these are supported yet. + +Directories +----------- + +A directory is a filesystem object and has an inode just like a file. +It is a specially formatted file containing records which associate +each name with an inode number. Later revisions of the filesystem also +encode the type of the object (file, directory, symlink, device, fifo, +socket) to avoid the need to check the inode itself for this information +(support for taking advantage of this feature does not yet exist in +Glibc 2.2). + +The inode allocation code tries to assign inodes which are in the same +block group as the directory in which they are first created. + +The current implementation of ext2 uses a singly-linked list to store +the filenames in the directory; a pending enhancement uses hashing of the +filenames to allow lookup without the need to scan the entire directory. + +The current implementation never removes empty directory blocks once they +have been allocated to hold more files. + +Special files +------------- + +Symbolic links are also filesystem objects with inodes. They deserve +special mention because the data for them is stored within the inode +itself if the symlink is less than 60 bytes long. It uses the fields +which would normally be used to store the pointers to data blocks. +This is a worthwhile optimisation as it we avoid allocating a full +block for the symlink, and most symlinks are less than 60 characters long. + +Character and block special devices never have data blocks assigned to +them. Instead, their device number is stored in the inode, again reusing +the fields which would be used to point to the data blocks. + +Reserved Space +-------------- + +In ext2, there is a mechanism for reserving a certain number of blocks +for a particular user (normally the super-user). This is intended to +allow for the system to continue functioning even if non-privileged users +fill up all the space available to them (this is independent of filesystem +quotas). It also keeps the filesystem from filling up entirely which +helps combat fragmentation. + +Filesystem check +---------------- + +At boot time, most systems run a consistency check (e2fsck) on their +filesystems. The superblock of the ext2 filesystem contains several +fields which indicate whether fsck should actually run (since checking +the filesystem at boot can take a long time if it is large). fsck will +run if the filesystem was not cleanly unmounted, if the maximum mount +count has been exceeded or if the maximum time between checks has been +exceeded. + +Feature Compatibility +--------------------- + +The compatibility feature mechanism used in ext2 is sophisticated. +It safely allows features to be added to the filesystem, without +unnecessarily sacrificing compatibility with older versions of the +filesystem code. The feature compatibility mechanism is not supported by +the original revision 0 (EXT2_GOOD_OLD_REV) of ext2, but was introduced in +revision 1. There are three 32-bit fields, one for compatible features +(COMPAT), one for read-only compatible (RO_COMPAT) features and one for +incompatible (INCOMPAT) features. + +These feature flags have specific meanings for the kernel as follows: + +A COMPAT flag indicates that a feature is present in the filesystem, +but the on-disk format is 100% compatible with older on-disk formats, so +a kernel which didn't know anything about this feature could read/write +the filesystem without any chance of corrupting the filesystem (or even +making it inconsistent). This is essentially just a flag which says +"this filesystem has a (hidden) feature" that the kernel or e2fsck may +want to be aware of (more on e2fsck and feature flags later). The ext3 +HAS_JOURNAL feature is a COMPAT flag because the ext3 journal is simply +a regular file with data blocks in it so the kernel does not need to +take any special notice of it if it doesn't understand ext3 journaling. + +An RO_COMPAT flag indicates that the on-disk format is 100% compatible +with older on-disk formats for reading (i.e. the feature does not change +the visible on-disk format). However, an old kernel writing to such a +filesystem would/could corrupt the filesystem, so this is prevented. The +most common such feature, SPARSE_SUPER, is an RO_COMPAT feature because +sparse groups allow file data blocks where superblock/group descriptor +backups used to live, and ext2_free_blocks() refuses to free these blocks, +which would leading to inconsistent bitmaps. An old kernel would also +get an error if it tried to free a series of blocks which crossed a group +boundary, but this is a legitimate layout in a SPARSE_SUPER filesystem. + +An INCOMPAT flag indicates the on-disk format has changed in some +way that makes it unreadable by older kernels, or would otherwise +cause a problem if an old kernel tried to mount it. FILETYPE is an +INCOMPAT flag because older kernels would think a filename was longer +than 256 characters, which would lead to corrupt directory listings. +The COMPRESSION flag is an obvious INCOMPAT flag - if the kernel +doesn't understand compression, you would just get garbage back from +read() instead of it automatically decompressing your data. The ext3 +RECOVER flag is needed to prevent a kernel which does not understand the +ext3 journal from mounting the filesystem without replaying the journal. + +For e2fsck, it needs to be more strict with the handling of these +flags than the kernel. If it doesn't understand ANY of the COMPAT, +RO_COMPAT, or INCOMPAT flags it will refuse to check the filesystem, +because it has no way of verifying whether a given feature is valid +or not. Allowing e2fsck to succeed on a filesystem with an unknown +feature is a false sense of security for the user. Refusing to check +a filesystem with unknown features is a good incentive for the user to +update to the latest e2fsck. This also means that anyone adding feature +flags to ext2 also needs to update e2fsck to verify these features. + +Metadata +-------- + +It is frequently claimed that the ext2 implementation of writing +asynchronous metadata is faster than the ffs synchronous metadata +scheme but less reliable. Both methods are equally resolvable by their +respective fsck programs. + +If you're exceptionally paranoid, there are 3 ways of making metadata +writes synchronous on ext2: + +- per-file if you have the program source: use the O_SYNC flag to open() +- per-file if you don't have the source: use "chattr +S" on the file +- per-filesystem: add the "sync" option to mount (or in /etc/fstab) + +the first and last are not ext2 specific but do force the metadata to +be written synchronously. See also Journaling below. + +Limitations +----------- + +There are various limits imposed by the on-disk layout of ext2. Other +limits are imposed by the current implementation of the kernel code. +Many of the limits are determined at the time the filesystem is first +created, and depend upon the block size chosen. The ratio of inodes to +data blocks is fixed at filesystem creation time, so the only way to +increase the number of inodes is to increase the size of the filesystem. +No tools currently exist which can change the ratio of inodes to blocks. + +Most of these limits could be overcome with slight changes in the on-disk +format and using a compatibility flag to signal the format change (at +the expense of some compatibility). + +===================== ======= ======= ======= ======== +Filesystem block size 1kB 2kB 4kB 8kB +===================== ======= ======= ======= ======== +File size limit 16GB 256GB 2048GB 2048GB +Filesystem size limit 2047GB 8192GB 16384GB 32768GB +===================== ======= ======= ======= ======== + +There is a 2.4 kernel limit of 2048GB for a single block device, so no +filesystem larger than that can be created at this time. There is also +an upper limit on the block size imposed by the page size of the kernel, +so 8kB blocks are only allowed on Alpha systems (and other architectures +which support larger pages). + +There is an upper limit of 32000 subdirectories in a single directory. + +There is a "soft" upper limit of about 10-15k files in a single directory +with the current linear linked-list directory implementation. This limit +stems from performance problems when creating and deleting (and also +finding) files in such large directories. Using a hashed directory index +(under development) allows 100k-1M+ files in a single directory without +performance problems (although RAM size becomes an issue at this point). + +The (meaningless) absolute upper limit of files in a single directory +(imposed by the file size, the realistic limit is obviously much less) +is over 130 trillion files. It would be higher except there are not +enough 4-character names to make up unique directory entries, so they +have to be 8 character filenames, even then we are fairly close to +running out of unique filenames. + +Journaling +---------- + +A journaling extension to the ext2 code has been developed by Stephen +Tweedie. It avoids the risks of metadata corruption and the need to +wait for e2fsck to complete after a crash, without requiring a change +to the on-disk ext2 layout. In a nutshell, the journal is a regular +file which stores whole metadata (and optionally data) blocks that have +been modified, prior to writing them into the filesystem. This means +it is possible to add a journal to an existing ext2 filesystem without +the need for data conversion. + +When changes to the filesystem (e.g. a file is renamed) they are stored in +a transaction in the journal and can either be complete or incomplete at +the time of a crash. If a transaction is complete at the time of a crash +(or in the normal case where the system does not crash), then any blocks +in that transaction are guaranteed to represent a valid filesystem state, +and are copied into the filesystem. If a transaction is incomplete at +the time of the crash, then there is no guarantee of consistency for +the blocks in that transaction so they are discarded (which means any +filesystem changes they represent are also lost). +Check Documentation/filesystems/ext4/ if you want to read more about +ext4 and journaling. + +References +========== + +======================= =============================================== +The kernel source file:/usr/src/linux/fs/ext2/ +e2fsprogs (e2fsck) http://e2fsprogs.sourceforge.net/ +Design & Implementation http://e2fsprogs.sourceforge.net/ext2intro.html +Journaling (ext3) ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/ +Filesystem Resizing http://ext2resize.sourceforge.net/ +Compression [1]_ http://e2compr.sourceforge.net/ +======================= =============================================== + +Implementations for: + +======================= =========================================================== +Windows 95/98/NT/2000 http://www.chrysocome.net/explore2fs +Windows 95 [1]_ http://www.yipton.net/content.html#FSDEXT2 +DOS client [1]_ ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/ +OS/2 [2]_ ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/ +RISC OS client http://www.esw-heim.tu-clausthal.de/~marco/smorbrod/IscaFS/ +======================= =========================================================== + +.. [1] no longer actively developed/supported (as of Apr 2001) +.. [2] no longer actively developed/supported (as of Mar 2009) diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt deleted file mode 100644 index 94c2cf0292f5..000000000000 --- a/Documentation/filesystems/ext2.txt +++ /dev/null @@ -1,388 +0,0 @@ - -The Second Extended Filesystem -============================== - -ext2 was originally released in January 1993. Written by R\'emy Card, -Theodore Ts'o and Stephen Tweedie, it was a major rewrite of the -Extended Filesystem. It is currently still (April 2001) the predominant -filesystem in use by Linux. There are also implementations available -for NetBSD, FreeBSD, the GNU HURD, Windows 95/98/NT, OS/2 and RISC OS. - -Options -======= - -Most defaults are determined by the filesystem superblock, and can be -set using tune2fs(8). Kernel-determined defaults are indicated by (*). - -bsddf (*) Makes `df' act like BSD. -minixdf Makes `df' act like Minix. - -check=none, nocheck (*) Don't do extra checking of bitmaps on mount - (check=normal and check=strict options removed) - -dax Use direct access (no page cache). See - Documentation/filesystems/dax.txt. - -debug Extra debugging information is sent to the - kernel syslog. Useful for developers. - -errors=continue Keep going on a filesystem error. -errors=remount-ro Remount the filesystem read-only on an error. -errors=panic Panic and halt the machine if an error occurs. - -grpid, bsdgroups Give objects the same group ID as their parent. -nogrpid, sysvgroups New objects have the group ID of their creator. - -nouid32 Use 16-bit UIDs and GIDs. - -oldalloc Enable the old block allocator. Orlov should - have better performance, we'd like to get some - feedback if it's the contrary for you. -orlov (*) Use the Orlov block allocator. - (See http://lwn.net/Articles/14633/ and - http://lwn.net/Articles/14446/.) - -resuid=n The user ID which may use the reserved blocks. -resgid=n The group ID which may use the reserved blocks. - -sb=n Use alternate superblock at this location. - -user_xattr Enable "user." POSIX Extended Attributes - (requires CONFIG_EXT2_FS_XATTR). -nouser_xattr Don't support "user." extended attributes. - -acl Enable POSIX Access Control Lists support - (requires CONFIG_EXT2_FS_POSIX_ACL). -noacl Don't support POSIX ACLs. - -nobh Do not attach buffer_heads to file pagecache. - -quota, usrquota Enable user disk quota support - (requires CONFIG_QUOTA). - -grpquota Enable group disk quota support - (requires CONFIG_QUOTA). - -noquota option ls silently ignored by ext2. - - -Specification -============= - -ext2 shares many properties with traditional Unix filesystems. It has -the concepts of blocks, inodes and directories. It has space in the -specification for Access Control Lists (ACLs), fragments, undeletion and -compression though these are not yet implemented (some are available as -separate patches). There is also a versioning mechanism to allow new -features (such as journalling) to be added in a maximally compatible -manner. - -Blocks ------- - -The space in the device or file is split up into blocks. These are -a fixed size, of 1024, 2048 or 4096 bytes (8192 bytes on Alpha systems), -which is decided when the filesystem is created. Smaller blocks mean -less wasted space per file, but require slightly more accounting overhead, -and also impose other limits on the size of files and the filesystem. - -Block Groups ------------- - -Blocks are clustered into block groups in order to reduce fragmentation -and minimise the amount of head seeking when reading a large amount -of consecutive data. Information about each block group is kept in a -descriptor table stored in the block(s) immediately after the superblock. -Two blocks near the start of each group are reserved for the block usage -bitmap and the inode usage bitmap which show which blocks and inodes -are in use. Since each bitmap is limited to a single block, this means -that the maximum size of a block group is 8 times the size of a block. - -The block(s) following the bitmaps in each block group are designated -as the inode table for that block group and the remainder are the data -blocks. The block allocation algorithm attempts to allocate data blocks -in the same block group as the inode which contains them. - -The Superblock --------------- - -The superblock contains all the information about the configuration of -the filing system. The primary copy of the superblock is stored at an -offset of 1024 bytes from the start of the device, and it is essential -to mounting the filesystem. Since it is so important, backup copies of -the superblock are stored in block groups throughout the filesystem. -The first version of ext2 (revision 0) stores a copy at the start of -every block group, along with backups of the group descriptor block(s). -Because this can consume a considerable amount of space for large -filesystems, later revisions can optionally reduce the number of backup -copies by only putting backups in specific groups (this is the sparse -superblock feature). The groups chosen are 0, 1 and powers of 3, 5 and 7. - -The information in the superblock contains fields such as the total -number of inodes and blocks in the filesystem and how many are free, -how many inodes and blocks are in each block group, when the filesystem -was mounted (and if it was cleanly unmounted), when it was modified, -what version of the filesystem it is (see the Revisions section below) -and which OS created it. - -If the filesystem is revision 1 or higher, then there are extra fields, -such as a volume name, a unique identification number, the inode size, -and space for optional filesystem features to store configuration info. - -All fields in the superblock (as in all other ext2 structures) are stored -on the disc in little endian format, so a filesystem is portable between -machines without having to know what machine it was created on. - -Inodes ------- - -The inode (index node) is a fundamental concept in the ext2 filesystem. -Each object in the filesystem is represented by an inode. The inode -structure contains pointers to the filesystem blocks which contain the -data held in the object and all of the metadata about an object except -its name. The metadata about an object includes the permissions, owner, -group, flags, size, number of blocks used, access time, change time, -modification time, deletion time, number of links, fragments, version -(for NFS) and extended attributes (EAs) and/or Access Control Lists (ACLs). - -There are some reserved fields which are currently unused in the inode -structure and several which are overloaded. One field is reserved for the -directory ACL if the inode is a directory and alternately for the top 32 -bits of the file size if the inode is a regular file (allowing file sizes -larger than 2GB). The translator field is unused under Linux, but is used -by the HURD to reference the inode of a program which will be used to -interpret this object. Most of the remaining reserved fields have been -used up for both Linux and the HURD for larger owner and group fields, -The HURD also has a larger mode field so it uses another of the remaining -fields to store the extra more bits. - -There are pointers to the first 12 blocks which contain the file's data -in the inode. There is a pointer to an indirect block (which contains -pointers to the next set of blocks), a pointer to a doubly-indirect -block (which contains pointers to indirect blocks) and a pointer to a -trebly-indirect block (which contains pointers to doubly-indirect blocks). - -The flags field contains some ext2-specific flags which aren't catered -for by the standard chmod flags. These flags can be listed with lsattr -and changed with the chattr command, and allow specific filesystem -behaviour on a per-file basis. There are flags for secure deletion, -undeletable, compression, synchronous updates, immutability, append-only, -dumpable, no-atime, indexed directories, and data-journaling. Not all -of these are supported yet. - -Directories ------------ - -A directory is a filesystem object and has an inode just like a file. -It is a specially formatted file containing records which associate -each name with an inode number. Later revisions of the filesystem also -encode the type of the object (file, directory, symlink, device, fifo, -socket) to avoid the need to check the inode itself for this information -(support for taking advantage of this feature does not yet exist in -Glibc 2.2). - -The inode allocation code tries to assign inodes which are in the same -block group as the directory in which they are first created. - -The current implementation of ext2 uses a singly-linked list to store -the filenames in the directory; a pending enhancement uses hashing of the -filenames to allow lookup without the need to scan the entire directory. - -The current implementation never removes empty directory blocks once they -have been allocated to hold more files. - -Special files -------------- - -Symbolic links are also filesystem objects with inodes. They deserve -special mention because the data for them is stored within the inode -itself if the symlink is less than 60 bytes long. It uses the fields -which would normally be used to store the pointers to data blocks. -This is a worthwhile optimisation as it we avoid allocating a full -block for the symlink, and most symlinks are less than 60 characters long. - -Character and block special devices never have data blocks assigned to -them. Instead, their device number is stored in the inode, again reusing -the fields which would be used to point to the data blocks. - -Reserved Space --------------- - -In ext2, there is a mechanism for reserving a certain number of blocks -for a particular user (normally the super-user). This is intended to -allow for the system to continue functioning even if non-privileged users -fill up all the space available to them (this is independent of filesystem -quotas). It also keeps the filesystem from filling up entirely which -helps combat fragmentation. - -Filesystem check ----------------- - -At boot time, most systems run a consistency check (e2fsck) on their -filesystems. The superblock of the ext2 filesystem contains several -fields which indicate whether fsck should actually run (since checking -the filesystem at boot can take a long time if it is large). fsck will -run if the filesystem was not cleanly unmounted, if the maximum mount -count has been exceeded or if the maximum time between checks has been -exceeded. - -Feature Compatibility ---------------------- - -The compatibility feature mechanism used in ext2 is sophisticated. -It safely allows features to be added to the filesystem, without -unnecessarily sacrificing compatibility with older versions of the -filesystem code. The feature compatibility mechanism is not supported by -the original revision 0 (EXT2_GOOD_OLD_REV) of ext2, but was introduced in -revision 1. There are three 32-bit fields, one for compatible features -(COMPAT), one for read-only compatible (RO_COMPAT) features and one for -incompatible (INCOMPAT) features. - -These feature flags have specific meanings for the kernel as follows: - -A COMPAT flag indicates that a feature is present in the filesystem, -but the on-disk format is 100% compatible with older on-disk formats, so -a kernel which didn't know anything about this feature could read/write -the filesystem without any chance of corrupting the filesystem (or even -making it inconsistent). This is essentially just a flag which says -"this filesystem has a (hidden) feature" that the kernel or e2fsck may -want to be aware of (more on e2fsck and feature flags later). The ext3 -HAS_JOURNAL feature is a COMPAT flag because the ext3 journal is simply -a regular file with data blocks in it so the kernel does not need to -take any special notice of it if it doesn't understand ext3 journaling. - -An RO_COMPAT flag indicates that the on-disk format is 100% compatible -with older on-disk formats for reading (i.e. the feature does not change -the visible on-disk format). However, an old kernel writing to such a -filesystem would/could corrupt the filesystem, so this is prevented. The -most common such feature, SPARSE_SUPER, is an RO_COMPAT feature because -sparse groups allow file data blocks where superblock/group descriptor -backups used to live, and ext2_free_blocks() refuses to free these blocks, -which would leading to inconsistent bitmaps. An old kernel would also -get an error if it tried to free a series of blocks which crossed a group -boundary, but this is a legitimate layout in a SPARSE_SUPER filesystem. - -An INCOMPAT flag indicates the on-disk format has changed in some -way that makes it unreadable by older kernels, or would otherwise -cause a problem if an old kernel tried to mount it. FILETYPE is an -INCOMPAT flag because older kernels would think a filename was longer -than 256 characters, which would lead to corrupt directory listings. -The COMPRESSION flag is an obvious INCOMPAT flag - if the kernel -doesn't understand compression, you would just get garbage back from -read() instead of it automatically decompressing your data. The ext3 -RECOVER flag is needed to prevent a kernel which does not understand the -ext3 journal from mounting the filesystem without replaying the journal. - -For e2fsck, it needs to be more strict with the handling of these -flags than the kernel. If it doesn't understand ANY of the COMPAT, -RO_COMPAT, or INCOMPAT flags it will refuse to check the filesystem, -because it has no way of verifying whether a given feature is valid -or not. Allowing e2fsck to succeed on a filesystem with an unknown -feature is a false sense of security for the user. Refusing to check -a filesystem with unknown features is a good incentive for the user to -update to the latest e2fsck. This also means that anyone adding feature -flags to ext2 also needs to update e2fsck to verify these features. - -Metadata --------- - -It is frequently claimed that the ext2 implementation of writing -asynchronous metadata is faster than the ffs synchronous metadata -scheme but less reliable. Both methods are equally resolvable by their -respective fsck programs. - -If you're exceptionally paranoid, there are 3 ways of making metadata -writes synchronous on ext2: - -per-file if you have the program source: use the O_SYNC flag to open() -per-file if you don't have the source: use "chattr +S" on the file -per-filesystem: add the "sync" option to mount (or in /etc/fstab) - -the first and last are not ext2 specific but do force the metadata to -be written synchronously. See also Journaling below. - -Limitations ------------ - -There are various limits imposed by the on-disk layout of ext2. Other -limits are imposed by the current implementation of the kernel code. -Many of the limits are determined at the time the filesystem is first -created, and depend upon the block size chosen. The ratio of inodes to -data blocks is fixed at filesystem creation time, so the only way to -increase the number of inodes is to increase the size of the filesystem. -No tools currently exist which can change the ratio of inodes to blocks. - -Most of these limits could be overcome with slight changes in the on-disk -format and using a compatibility flag to signal the format change (at -the expense of some compatibility). - -Filesystem block size: 1kB 2kB 4kB 8kB - -File size limit: 16GB 256GB 2048GB 2048GB -Filesystem size limit: 2047GB 8192GB 16384GB 32768GB - -There is a 2.4 kernel limit of 2048GB for a single block device, so no -filesystem larger than that can be created at this time. There is also -an upper limit on the block size imposed by the page size of the kernel, -so 8kB blocks are only allowed on Alpha systems (and other architectures -which support larger pages). - -There is an upper limit of 32000 subdirectories in a single directory. - -There is a "soft" upper limit of about 10-15k files in a single directory -with the current linear linked-list directory implementation. This limit -stems from performance problems when creating and deleting (and also -finding) files in such large directories. Using a hashed directory index -(under development) allows 100k-1M+ files in a single directory without -performance problems (although RAM size becomes an issue at this point). - -The (meaningless) absolute upper limit of files in a single directory -(imposed by the file size, the realistic limit is obviously much less) -is over 130 trillion files. It would be higher except there are not -enough 4-character names to make up unique directory entries, so they -have to be 8 character filenames, even then we are fairly close to -running out of unique filenames. - -Journaling ----------- - -A journaling extension to the ext2 code has been developed by Stephen -Tweedie. It avoids the risks of metadata corruption and the need to -wait for e2fsck to complete after a crash, without requiring a change -to the on-disk ext2 layout. In a nutshell, the journal is a regular -file which stores whole metadata (and optionally data) blocks that have -been modified, prior to writing them into the filesystem. This means -it is possible to add a journal to an existing ext2 filesystem without -the need for data conversion. - -When changes to the filesystem (e.g. a file is renamed) they are stored in -a transaction in the journal and can either be complete or incomplete at -the time of a crash. If a transaction is complete at the time of a crash -(or in the normal case where the system does not crash), then any blocks -in that transaction are guaranteed to represent a valid filesystem state, -and are copied into the filesystem. If a transaction is incomplete at -the time of the crash, then there is no guarantee of consistency for -the blocks in that transaction so they are discarded (which means any -filesystem changes they represent are also lost). -Check Documentation/filesystems/ext4/ if you want to read more about -ext4 and journaling. - -References -========== - -The kernel source file:/usr/src/linux/fs/ext2/ -e2fsprogs (e2fsck) http://e2fsprogs.sourceforge.net/ -Design & Implementation http://e2fsprogs.sourceforge.net/ext2intro.html -Journaling (ext3) ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/ -Filesystem Resizing http://ext2resize.sourceforge.net/ -Compression (*) http://e2compr.sourceforge.net/ - -Implementations for: -Windows 95/98/NT/2000 http://www.chrysocome.net/explore2fs -Windows 95 (*) http://www.yipton.net/content.html#FSDEXT2 -DOS client (*) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/ -OS/2 (+) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/ -RISC OS client http://www.esw-heim.tu-clausthal.de/~marco/smorbrod/IscaFS/ - -(*) no longer actively developed/supported (as of Apr 2001) -(+) no longer actively developed/supported (as of Mar 2009) diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 03a493b27920..102b3b65486a 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -62,6 +62,7 @@ Documentation for filesystem implementations. ecryptfs efivarfs erofs + ext2 fuse overlayfs virtiofs -- cgit v1.2.3 From 7dc62406320c4103bbdeeeecd0a7ef03e3e58009 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:03 +0100 Subject: docs: filesystems: convert ext3.txt to ReST Nothing really required here. Just renaming would be enough. Yet, while here, lets add a SPDX header and adjust document title to met the same standard we're using on most docs. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/26960235e3e7c972bd543f5dd59f1ef4f3a877c6.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/ext3.rst | 14 ++++++++++++++ Documentation/filesystems/ext3.txt | 12 ------------ Documentation/filesystems/index.rst | 1 + 3 files changed, 15 insertions(+), 12 deletions(-) create mode 100644 Documentation/filesystems/ext3.rst delete mode 100644 Documentation/filesystems/ext3.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/ext3.rst b/Documentation/filesystems/ext3.rst new file mode 100644 index 000000000000..c06cec3a8fdc --- /dev/null +++ b/Documentation/filesystems/ext3.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +Ext3 Filesystem +=============== + +Ext3 was originally released in September 1999. Written by Stephen Tweedie +for the 2.2 branch, and ported to 2.4 kernels by Peter Braam, Andreas Dilger, +Andrew Morton, Alexander Viro, Ted Ts'o and Stephen Tweedie. + +Ext3 is the ext2 filesystem enhanced with journalling capabilities. The +filesystem is a subset of ext4 filesystem so use ext4 driver for accessing +ext3 filesystems. + diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt deleted file mode 100644 index 58758fbef9e0..000000000000 --- a/Documentation/filesystems/ext3.txt +++ /dev/null @@ -1,12 +0,0 @@ - -Ext3 Filesystem -=============== - -Ext3 was originally released in September 1999. Written by Stephen Tweedie -for the 2.2 branch, and ported to 2.4 kernels by Peter Braam, Andreas Dilger, -Andrew Morton, Alexander Viro, Ted Ts'o and Stephen Tweedie. - -Ext3 is the ext2 filesystem enhanced with journalling capabilities. The -filesystem is a subset of ext4 filesystem so use ext4 driver for accessing -ext3 filesystems. - diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 102b3b65486a..aa2c3d1de3de 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -63,6 +63,7 @@ Documentation for filesystem implementations. efivarfs erofs ext2 + ext3 fuse overlayfs virtiofs -- cgit v1.2.3 From 89272ca1102e000f7dbca724b7b106e688199a5d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:04 +0100 Subject: docs: filesystems: convert f2fs.txt to ReST - Add a SPDX header; - Adjust document and section titles; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/8dd156320b0c015dec6d3f848d03ea057042a15b.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/f2fs.rst | 762 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/f2fs.txt | 730 ---------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 763 insertions(+), 730 deletions(-) create mode 100644 Documentation/filesystems/f2fs.rst delete mode 100644 Documentation/filesystems/f2fs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst new file mode 100644 index 000000000000..d681203728d7 --- /dev/null +++ b/Documentation/filesystems/f2fs.rst @@ -0,0 +1,762 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================================== +WHAT IS Flash-Friendly File System (F2FS)? +========================================== + +NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have +been equipped on a variety systems ranging from mobile to server systems. Since +they are known to have different characteristics from the conventional rotating +disks, a file system, an upper layer to the storage device, should adapt to the +changes from the sketch in the design level. + +F2FS is a file system exploiting NAND flash memory-based storage devices, which +is based on Log-structured File System (LFS). The design has been focused on +addressing the fundamental issues in LFS, which are snowball effect of wandering +tree and high cleaning overhead. + +Since a NAND flash memory-based storage device shows different characteristic +according to its internal geometry or flash memory management scheme, namely FTL, +F2FS and its tools support various parameters not only for configuring on-disk +layout, but also for selecting allocation and cleaning algorithms. + +The following git tree provides the file system formatting tool (mkfs.f2fs), +a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). + +- git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git + +For reporting bugs and sending patches, please use the following mailing list: + +- linux-f2fs-devel@lists.sourceforge.net + +Background and Design issues +============================ + +Log-structured File System (LFS) +-------------------------------- +"A log-structured file system writes all modifications to disk sequentially in +a log-like structure, thereby speeding up both file writing and crash recovery. +The log is the only structure on disk; it contains indexing information so that +files can be read back from the log efficiently. In order to maintain large free +areas on disk for fast writing, we divide the log into segments and use a +segment cleaner to compress the live information from heavily fragmented +segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and +implementation of a log-structured file system", ACM Trans. Computer Systems +10, 1, 26–52. + +Wandering Tree Problem +---------------------- +In LFS, when a file data is updated and written to the end of log, its direct +pointer block is updated due to the changed location. Then the indirect pointer +block is also updated due to the direct pointer block update. In this manner, +the upper index structures such as inode, inode map, and checkpoint block are +also updated recursively. This problem is called as wandering tree problem [1], +and in order to enhance the performance, it should eliminate or relax the update +propagation as much as possible. + +[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ + +Cleaning Overhead +----------------- +Since LFS is based on out-of-place writes, it produces so many obsolete blocks +scattered across the whole storage. In order to serve new empty log space, it +needs to reclaim these obsolete blocks seamlessly to users. This job is called +as a cleaning process. + +The process consists of three operations as follows. + +1. A victim segment is selected through referencing segment usage table. +2. It loads parent index structures of all the data in the victim identified by + segment summary blocks. +3. It checks the cross-reference between the data and its parent index structure. +4. It moves valid data selectively. + +This cleaning job may cause unexpected long delays, so the most important goal +is to hide the latencies to users. And also definitely, it should reduce the +amount of valid data to be moved, and move them quickly as well. + +Key Features +============ + +Flash Awareness +--------------- +- Enlarge the random write area for better performance, but provide the high + spatial locality +- Align FS data structures to the operational units in FTL as best efforts + +Wandering Tree Problem +---------------------- +- Use a term, “node”, that represents inodes as well as various pointer blocks +- Introduce Node Address Table (NAT) containing the locations of all the “node” + blocks; this will cut off the update propagation. + +Cleaning Overhead +----------------- +- Support a background cleaning process +- Support greedy and cost-benefit algorithms for victim selection policies +- Support multi-head logs for static/dynamic hot and cold data separation +- Introduce adaptive logging for efficient block allocation + +Mount Options +============= + + +====================== ============================================================ +background_gc=%s Turn on/off cleaning operations, namely garbage + collection, triggered in background when I/O subsystem is + idle. If background_gc=on, it will turn on the garbage + collection and if background_gc=off, garbage collection + will be turned off. If background_gc=sync, it will turn + on synchronous garbage collection running in background. + Default value for this option is on. So garbage + collection is on by default. +disable_roll_forward Disable the roll-forward recovery routine +norecovery Disable the roll-forward recovery routine, mounted read- + only (i.e., -o ro,disable_roll_forward) +discard/nodiscard Enable/disable real-time discard in f2fs, if discard is + enabled, f2fs will issue discard/TRIM commands when a + segment is cleaned. +no_heap Disable heap-style segment allocation which finds free + segments for data from the beginning of main area, while + for node from the end of main area. +nouser_xattr Disable Extended User Attributes. Note: xattr is enabled + by default if CONFIG_F2FS_FS_XATTR is selected. +noacl Disable POSIX Access Control List. Note: acl is enabled + by default if CONFIG_F2FS_FS_POSIX_ACL is selected. +active_logs=%u Support configuring the number of active logs. In the + current design, f2fs supports only 2, 4, and 6 logs. + Default number is 6. +disable_ext_identify Disable the extension list configured by mkfs, so f2fs + does not aware of cold files such as media files. +inline_xattr Enable the inline xattrs feature. +noinline_xattr Disable the inline xattrs feature. +inline_xattr_size=%u Support configuring inline xattr size, it depends on + flexible inline xattr feature. +inline_data Enable the inline data feature: New created small(<~3.4k) + files can be written into inode block. +inline_dentry Enable the inline dir feature: data in new created + directory entries can be written into inode block. The + space of inode block which is used to store inline + dentries is limited to ~3.4k. +noinline_dentry Disable the inline dentry feature. +flush_merge Merge concurrent cache_flush commands as much as possible + to eliminate redundant command issues. If the underlying + device handles the cache_flush command relatively slowly, + recommend to enable this option. +nobarrier This option can be used if underlying storage guarantees + its cached data should be written to the novolatile area. + If this option is set, no cache_flush commands are issued + but f2fs still guarantees the write ordering of all the + data writes. +fastboot This option is used when a system wants to reduce mount + time as much as possible, even though normal performance + can be sacrificed. +extent_cache Enable an extent cache based on rb-tree, it can cache + as many as extent which map between contiguous logical + address and physical address per inode, resulting in + increasing the cache hit ratio. Set by default. +noextent_cache Disable an extent cache based on rb-tree explicitly, see + the above extent_cache mount option. +noinline_data Disable the inline data feature, inline data feature is + enabled by default. +data_flush Enable data flushing before checkpoint in order to + persist data of regular and symlink. +reserve_root=%d Support configuring reserved space which is used for + allocation from a privileged user with specified uid or + gid, unit: 4KB, the default limit is 0.2% of user blocks. +resuid=%d The user ID which may use the reserved blocks. +resgid=%d The group ID which may use the reserved blocks. +fault_injection=%d Enable fault injection in all supported types with + specified injection rate. +fault_type=%d Support configuring fault injection type, should be + enabled with fault_injection option, fault type value + is shown below, it supports single or combined type. + + =================== =========== + Type_Name Type_Value + =================== =========== + FAULT_KMALLOC 0x000000001 + FAULT_KVMALLOC 0x000000002 + FAULT_PAGE_ALLOC 0x000000004 + FAULT_PAGE_GET 0x000000008 + FAULT_ALLOC_BIO 0x000000010 + FAULT_ALLOC_NID 0x000000020 + FAULT_ORPHAN 0x000000040 + FAULT_BLOCK 0x000000080 + FAULT_DIR_DEPTH 0x000000100 + FAULT_EVICT_INODE 0x000000200 + FAULT_TRUNCATE 0x000000400 + FAULT_READ_IO 0x000000800 + FAULT_CHECKPOINT 0x000001000 + FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 + =================== =========== +mode=%s Control block allocation mode which supports "adaptive" + and "lfs". In "lfs" mode, there should be no random + writes towards main area. +io_bits=%u Set the bit size of write IO requests. It should be set + with "mode=lfs". +usrquota Enable plain user disk quota accounting. +grpquota Enable plain group disk quota accounting. +prjquota Enable plain project quota accounting. +usrjquota= Appoint specified file and type during mount, so that quota +grpjquota= information can be properly updated during recovery flow, +prjjquota= : must be in root directory; +jqfmt= : [vfsold,vfsv0,vfsv1]. +offusrjquota Turn off user journelled quota. +offgrpjquota Turn off group journelled quota. +offprjjquota Turn off project journelled quota. +quota Enable plain user disk quota accounting. +noquota Disable all plain disk quota option. +whint_mode=%s Control which write hints are passed down to block + layer. This supports "off", "user-based", and + "fs-based". In "off" mode (default), f2fs does not pass + down hints. In "user-based" mode, f2fs tries to pass + down hints given by users. And in "fs-based" mode, f2fs + passes down hints with its policy. +alloc_mode=%s Adjust block allocation policy, which supports "reuse" + and "default". +fsync_mode=%s Control the policy of fsync. Currently supports "posix", + "strict", and "nobarrier". In "posix" mode, which is + default, fsync will follow POSIX semantics and does a + light operation to improve the filesystem performance. + In "strict" mode, fsync will be heavy and behaves in line + with xfs, ext4 and btrfs, where xfstest generic/342 will + pass, but the performance will regress. "nobarrier" is + based on "posix", but doesn't issue flush command for + non-atomic files likewise "nobarrier" mount option. +test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt + context. The fake fscrypt context is used by xfstests. +checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable" + to reenable checkpointing. Is enabled by default. While + disabled, any unmounting or unexpected shutdowns will cause + the filesystem contents to appear as they did when the + filesystem was mounted with that option. + While mounting with checkpoint=disabled, the filesystem must + run garbage collection to ensure that all available space can + be used. If this takes too much time, the mount may return + EAGAIN. You may optionally add a value to indicate how much + of the disk you would be willing to temporarily give up to + avoid additional garbage collection. This can be given as a + number of blocks, or as a percent. For instance, mounting + with checkpoint=disable:100% would always succeed, but it may + hide up to all remaining free space. The actual space that + would be unusable can be viewed at /sys/fs/f2fs//unusable + This space is reclaimed once checkpoint=enable. +compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo" + and "lz4" algorithm. +compress_log_size=%u Support configuring compress cluster size, the size will + be 4KB * (1 << %u), 16KB is minimum size, also it's + default size. +compress_extension=%s Support adding specified extension, so that f2fs can enable + compression on those corresponding files, e.g. if all files + with '.ext' has high compression rate, we can set the '.ext' + on compression extension list and enable compression on + these file by default rather than to enable it via ioctl. + For other files, we can still enable compression via ioctl. +====================== ============================================================ + +Debugfs Entries +=============== + +/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as +f2fs. Each file shows the whole f2fs information. + +/sys/kernel/debug/f2fs/status includes: + + - major file system information managed by f2fs currently + - average SIT information about whole segments + - current memory footprint consumed by f2fs. + +Sysfs Entries +============= + +Information about mounted f2fs file systems can be found in +/sys/fs/f2fs. Each mounted filesystem will have a directory in +/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). +The files in each per-device directory are shown in table below. + +Files in /sys/fs/f2fs/ +(see also Documentation/ABI/testing/sysfs-fs-f2fs) + +Usage +===== + +1. Download userland tools and compile them. + +2. Skip, if f2fs was compiled statically inside kernel. + Otherwise, insert the f2fs.ko module:: + + # insmod f2fs.ko + +3. Create a directory trying to mount:: + + # mkdir /mnt/f2fs + +4. Format the block device, and then mount as f2fs:: + + # mkfs.f2fs -l label /dev/block_device + # mount -t f2fs /dev/block_device /mnt/f2fs + +mkfs.f2fs +--------- +The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, +which builds a basic on-disk layout. + +The options consist of: + +=============== =========================================================== +``-l [label]`` Give a volume label, up to 512 unicode name. +``-a [0 or 1]`` Split start location of each area for heap-based allocation. + + 1 is set by default, which performs this. +``-o [int]`` Set overprovision ratio in percent over volume size. + + 5 is set by default. +``-s [int]`` Set the number of segments per section. + + 1 is set by default. +``-z [int]`` Set the number of sections per zone. + + 1 is set by default. +``-e [str]`` Set basic extension list. e.g. "mp3,gif,mov" +``-t [0 or 1]`` Disable discard command or not. + + 1 is set by default, which conducts discard. +=============== =========================================================== + +fsck.f2fs +--------- +The fsck.f2fs is a tool to check the consistency of an f2fs-formatted +partition, which examines whether the filesystem metadata and user-made data +are cross-referenced correctly or not. +Note that, initial version of the tool does not fix any inconsistency. + +The options consist of:: + + -d debug level [default:0] + +dump.f2fs +--------- +The dump.f2fs shows the information of specific inode and dumps SSA and SIT to +file. Each file is dump_ssa and dump_sit. + +The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. +It shows on-disk inode information recognized by a given inode number, and is +able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and +./dump_sit respectively. + +The options consist of:: + + -d debug level [default:0] + -i inode no (hex) + -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] + -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] + +Examples:: + + # dump.f2fs -i [ino] /dev/sdx + # dump.f2fs -s 0~-1 /dev/sdx (SIT dump) + # dump.f2fs -a 0~-1 /dev/sdx (SSA dump) + +Design +====== + +On-disk Layout +-------------- + +F2FS divides the whole volume into a number of segments, each of which is fixed +to 2MB in size. A section is composed of consecutive segments, and a zone +consists of a set of sections. By default, section and zone sizes are set to one +segment size identically, but users can easily modify the sizes by mkfs. + +F2FS splits the entire volume into six areas, and all the areas except superblock +consists of multiple segments as described below:: + + align with the zone size <-| + |-> align with the segment size + _________________________________________________________________________ + | | | Segment | Node | Segment | | + | Superblock | Checkpoint | Info. | Address | Summary | Main | + | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | | + |____________|_____2______|______N______|______N______|______N_____|__N___| + . . + . . + . . + ._________________________________________. + |_Segment_|_..._|_Segment_|_..._|_Segment_| + . . + ._________._________ + |_section_|__...__|_ + . . + .________. + |__zone__| + +- Superblock (SB) + It is located at the beginning of the partition, and there exist two copies + to avoid file system crash. It contains basic partition information and some + default parameters of f2fs. + +- Checkpoint (CP) + It contains file system information, bitmaps for valid NAT/SIT sets, orphan + inode lists, and summary entries of current active segments. + +- Segment Information Table (SIT) + It contains segment information such as valid block count and bitmap for the + validity of all the blocks. + +- Node Address Table (NAT) + It is composed of a block address table for all the node blocks stored in + Main area. + +- Segment Summary Area (SSA) + It contains summary entries which contains the owner information of all the + data and node blocks stored in Main area. + +- Main Area + It contains file and directory data including their indices. + +In order to avoid misalignment between file system and flash-based storage, F2FS +aligns the start block address of CP with the segment size. Also, it aligns the +start block address of Main area with the zone size by reserving some segments +in SSA area. + +Reference the following survey for additional technical details. +https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey + +File System Metadata Structure +------------------------------ + +F2FS adopts the checkpointing scheme to maintain file system consistency. At +mount time, F2FS first tries to find the last valid checkpoint data by scanning +CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. +One of them always indicates the last valid data, which is called as shadow copy +mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. + +For file system consistency, each CP points to which NAT and SIT copies are +valid, as shown as below:: + + +--------+----------+---------+ + | CP | SIT | NAT | + +--------+----------+---------+ + . . . . + . . . . + . . . . + +-------+-------+--------+--------+--------+--------+ + | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 | + +-------+-------+--------+--------+--------+--------+ + | ^ ^ + | | | + `----------------------------------------' + +Index Structure +--------------- + +The key data structure to manage the data locations is a "node". Similar to +traditional file structures, F2FS has three types of node: inode, direct node, +indirect node. F2FS assigns 4KB to an inode block which contains 923 data block +indices, two direct node pointers, two indirect node pointers, and one double +indirect node pointer as described below. One direct node block contains 1018 +data blocks, and one indirect node block contains also 1018 node blocks. Thus, +one inode block (i.e., a file) covers:: + + 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. + + Inode block (4KB) + |- data (923) + |- direct node (2) + | `- data (1018) + |- indirect node (2) + | `- direct node (1018) + | `- data (1018) + `- double indirect node (1) + `- indirect node (1018) + `- direct node (1018) + `- data (1018) + +Note that, all the node blocks are mapped by NAT which means the location of +each node is translated by the NAT table. In the consideration of the wandering +tree problem, F2FS is able to cut off the propagation of node updates caused by +leaf data writes. + +Directory Structure +------------------- + +A directory entry occupies 11 bytes, which consists of the following attributes. + +- hash hash value of the file name +- ino inode number +- len the length of file name +- type file type such as directory, symlink, etc + +A dentry block consists of 214 dentry slots and file names. Therein a bitmap is +used to represent whether each dentry is valid or not. A dentry block occupies +4KB with the following composition. + +:: + + Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + + dentries(11 * 214 bytes) + file name (8 * 214 bytes) + + [Bucket] + +--------------------------------+ + |dentry block 1 | dentry block 2 | + +--------------------------------+ + . . + . . + . [Dentry Block Structure: 4KB] . + +--------+----------+----------+------------+ + | bitmap | reserved | dentries | file names | + +--------+----------+----------+------------+ + [Dentry Block: 4KB] . . + . . + . . + +------+------+-----+------+ + | hash | ino | len | type | + +------+------+-----+------+ + [Dentry Structure: 11 bytes] + +F2FS implements multi-level hash tables for directory structure. Each level has +a hash table with dedicated number of hash buckets as shown below. Note that +"A(2B)" means a bucket includes 2 data blocks. + +:: + + ---------------------- + A : bucket + B : block + N : MAX_DIR_HASH_DEPTH + ---------------------- + + level #0 | A(2B) + | + level #1 | A(2B) - A(2B) + | + level #2 | A(2B) - A(2B) - A(2B) - A(2B) + . | . . . . + level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) + . | . . . . + level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) + +The number of blocks and buckets are determined by:: + + ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, + # of blocks in level #n = | + `- 4, Otherwise + + ,- 2^(n + dir_level), + | if n + dir_level < MAX_DIR_HASH_DEPTH / 2, + # of buckets in level #n = | + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), + Otherwise + +When F2FS finds a file name in a directory, at first a hash value of the file +name is calculated. Then, F2FS scans the hash table in level #0 to find the +dentry consisting of the file name and its inode number. If not found, F2FS +scans the next hash table in level #1. In this way, F2FS scans hash tables in +each levels incrementally from 1 to N. In each levels F2FS needs to scan only +one bucket determined by the following equation, which shows O(log(# of files)) +complexity:: + + bucket number to scan in level #n = (hash value) % (# of buckets in level #n) + +In the case of file creation, F2FS finds empty consecutive slots that cover the +file name. F2FS searches the empty slots in the hash tables of whole levels from +1 to N in the same way as the lookup operation. + +The following figure shows an example of two cases holding children:: + + --------------> Dir <-------------- + | | + child child + + child - child [hole] - child + + child - child - child [hole] - [hole] - child + + Case 1: Case 2: + Number of children = 6, Number of children = 3, + File size = 7 File size = 7 + +Default Block Allocation +------------------------ + +At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node +and Hot/Warm/Cold data. + +- Hot node contains direct node blocks of directories. +- Warm node contains direct node blocks except hot node blocks. +- Cold node contains indirect node blocks +- Hot data contains dentry blocks +- Warm data contains data blocks except hot and cold data blocks +- Cold data contains multimedia data or migrated data blocks + +LFS has two schemes for free space management: threaded log and copy-and-compac- +tion. The copy-and-compaction scheme which is known as cleaning, is well-suited +for devices showing very good sequential write performance, since free segments +are served all the time for writing new data. However, it suffers from cleaning +overhead under high utilization. Contrarily, the threaded log scheme suffers +from random writes, but no cleaning process is needed. F2FS adopts a hybrid +scheme where the copy-and-compaction scheme is adopted by default, but the +policy is dynamically changed to the threaded log scheme according to the file +system status. + +In order to align F2FS with underlying flash-based storage, F2FS allocates a +segment in a unit of section. F2FS expects that the section size would be the +same as the unit size of garbage collection in FTL. Furthermore, with respect +to the mapping granularity in FTL, F2FS allocates each section of the active +logs from different zones as much as possible, since FTL can write the data in +the active logs into one allocation unit according to its mapping granularity. + +Cleaning process +---------------- + +F2FS does cleaning both on demand and in the background. On-demand cleaning is +triggered when there are not enough free segments to serve VFS calls. Background +cleaner is operated by a kernel thread, and triggers the cleaning job when the +system is idle. + +F2FS supports two victim selection policies: greedy and cost-benefit algorithms. +In the greedy algorithm, F2FS selects a victim segment having the smallest number +of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment +according to the segment age and the number of valid blocks in order to address +log block thrashing problem in the greedy algorithm. F2FS adopts the greedy +algorithm for on-demand cleaner, while background cleaner adopts cost-benefit +algorithm. + +In order to identify whether the data in the victim segment are valid or not, +F2FS manages a bitmap. Each bit represents the validity of a block, and the +bitmap is composed of a bit stream covering whole blocks in main area. + +Write-hint Policy +----------------- + +1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. + +2) whint_mode=user-based. F2FS tries to pass down hints given by +users. + +===================== ======================== =================== +User F2FS Block +===================== ======================== =================== + META WRITE_LIFE_NOT_SET + HOT_NODE " + WARM_NODE " + COLD_NODE " +ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME +extension list " " + +-- buffered io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET +WRITE_LIFE_NONE " " +WRITE_LIFE_MEDIUM " " +WRITE_LIFE_LONG " " + +-- direct io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET +WRITE_LIFE_NONE " WRITE_LIFE_NONE +WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM +WRITE_LIFE_LONG " WRITE_LIFE_LONG +===================== ======================== =================== + +3) whint_mode=fs-based. F2FS passes down hints with its policy. + +===================== ======================== =================== +User F2FS Block +===================== ======================== =================== + META WRITE_LIFE_MEDIUM; + HOT_NODE WRITE_LIFE_NOT_SET + WARM_NODE " + COLD_NODE WRITE_LIFE_NONE +ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME +extension list " " + +-- buffered io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG +WRITE_LIFE_NONE " " +WRITE_LIFE_MEDIUM " " +WRITE_LIFE_LONG " " + +-- direct io +WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME +WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT +WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET +WRITE_LIFE_NONE " WRITE_LIFE_NONE +WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM +WRITE_LIFE_LONG " WRITE_LIFE_LONG +===================== ======================== =================== + +Fallocate(2) Policy +------------------- + +The default policy follows the below posix rule. + +Allocating disk space + The default operation (i.e., mode is zero) of fallocate() allocates + the disk space within the range specified by offset and len. The + file size (as reported by stat(2)) will be changed if offset+len is + greater than the file size. Any subregion within the range specified + by offset and len that did not contain data before the call will be + initialized to zero. This default behavior closely resembles the + behavior of the posix_fallocate(3) library function, and is intended + as a method of optimally implementing that function. + +However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to +fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having +zero or random data, which is useful to the below scenario where: + + 1. create(fd) + 2. ioctl(fd, F2FS_IOC_SET_PIN_FILE) + 3. fallocate(fd, 0, 0, size) + 4. address = fibmap(fd, offset) + 5. open(blkdev) + 6. write(blkdev, address) + +Compression implementation +-------------------------- + +- New term named cluster is defined as basic unit of compression, file can + be divided into multiple clusters logically. One cluster includes 4 << n + (n >= 0) logical pages, compression size is also cluster size, each of + cluster can be compressed or not. + +- In cluster metadata layout, one special block address is used to indicate + cluster is compressed one or normal one, for compressed cluster, following + metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs + stores data including compress header and compressed data. + +- In order to eliminate write amplification during overwrite, F2FS only + support compression on write-once file, data can be compressed only when + all logical blocks in file are valid and cluster compress ratio is lower + than specified threshold. + +- To enable compression on regular inode, there are three ways: + + * chattr +c file + * chattr +c dir; touch dir/file + * mount w/ -o compress_extension=ext; touch file.ext + +Compress metadata layout:: + + [Dnode Structure] + +-----------------------------------------------+ + | cluster 1 | cluster 2 | ......... | cluster N | + +-----------------------------------------------+ + . . . . + . . . . + . Compressed Cluster . . Normal Cluster . + +----------+---------+---------+---------+ +---------+---------+---------+---------+ + |compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 | + +----------+---------+---------+---------+ +---------+---------+---------+---------+ + . . + . . + . . + +-------------+-------------+----------+----------------------------+ + | data length | data chksum | reserved | compressed data | + +-------------+-------------+----------+----------------------------+ diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt deleted file mode 100644 index 4eb3e2ddd00e..000000000000 --- a/Documentation/filesystems/f2fs.txt +++ /dev/null @@ -1,730 +0,0 @@ -================================================================================ -WHAT IS Flash-Friendly File System (F2FS)? -================================================================================ - -NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have -been equipped on a variety systems ranging from mobile to server systems. Since -they are known to have different characteristics from the conventional rotating -disks, a file system, an upper layer to the storage device, should adapt to the -changes from the sketch in the design level. - -F2FS is a file system exploiting NAND flash memory-based storage devices, which -is based on Log-structured File System (LFS). The design has been focused on -addressing the fundamental issues in LFS, which are snowball effect of wandering -tree and high cleaning overhead. - -Since a NAND flash memory-based storage device shows different characteristic -according to its internal geometry or flash memory management scheme, namely FTL, -F2FS and its tools support various parameters not only for configuring on-disk -layout, but also for selecting allocation and cleaning algorithms. - -The following git tree provides the file system formatting tool (mkfs.f2fs), -a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). ->> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git - -For reporting bugs and sending patches, please use the following mailing list: ->> linux-f2fs-devel@lists.sourceforge.net - -================================================================================ -BACKGROUND AND DESIGN ISSUES -================================================================================ - -Log-structured File System (LFS) --------------------------------- -"A log-structured file system writes all modifications to disk sequentially in -a log-like structure, thereby speeding up both file writing and crash recovery. -The log is the only structure on disk; it contains indexing information so that -files can be read back from the log efficiently. In order to maintain large free -areas on disk for fast writing, we divide the log into segments and use a -segment cleaner to compress the live information from heavily fragmented -segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and -implementation of a log-structured file system", ACM Trans. Computer Systems -10, 1, 26–52. - -Wandering Tree Problem ----------------------- -In LFS, when a file data is updated and written to the end of log, its direct -pointer block is updated due to the changed location. Then the indirect pointer -block is also updated due to the direct pointer block update. In this manner, -the upper index structures such as inode, inode map, and checkpoint block are -also updated recursively. This problem is called as wandering tree problem [1], -and in order to enhance the performance, it should eliminate or relax the update -propagation as much as possible. - -[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/ - -Cleaning Overhead ------------------ -Since LFS is based on out-of-place writes, it produces so many obsolete blocks -scattered across the whole storage. In order to serve new empty log space, it -needs to reclaim these obsolete blocks seamlessly to users. This job is called -as a cleaning process. - -The process consists of three operations as follows. -1. A victim segment is selected through referencing segment usage table. -2. It loads parent index structures of all the data in the victim identified by - segment summary blocks. -3. It checks the cross-reference between the data and its parent index structure. -4. It moves valid data selectively. - -This cleaning job may cause unexpected long delays, so the most important goal -is to hide the latencies to users. And also definitely, it should reduce the -amount of valid data to be moved, and move them quickly as well. - -================================================================================ -KEY FEATURES -================================================================================ - -Flash Awareness ---------------- -- Enlarge the random write area for better performance, but provide the high - spatial locality -- Align FS data structures to the operational units in FTL as best efforts - -Wandering Tree Problem ----------------------- -- Use a term, “node”, that represents inodes as well as various pointer blocks -- Introduce Node Address Table (NAT) containing the locations of all the “node” - blocks; this will cut off the update propagation. - -Cleaning Overhead ------------------ -- Support a background cleaning process -- Support greedy and cost-benefit algorithms for victim selection policies -- Support multi-head logs for static/dynamic hot and cold data separation -- Introduce adaptive logging for efficient block allocation - -================================================================================ -MOUNT OPTIONS -================================================================================ - -background_gc=%s Turn on/off cleaning operations, namely garbage - collection, triggered in background when I/O subsystem is - idle. If background_gc=on, it will turn on the garbage - collection and if background_gc=off, garbage collection - will be turned off. If background_gc=sync, it will turn - on synchronous garbage collection running in background. - Default value for this option is on. So garbage - collection is on by default. -disable_roll_forward Disable the roll-forward recovery routine -norecovery Disable the roll-forward recovery routine, mounted read- - only (i.e., -o ro,disable_roll_forward) -discard/nodiscard Enable/disable real-time discard in f2fs, if discard is - enabled, f2fs will issue discard/TRIM commands when a - segment is cleaned. -no_heap Disable heap-style segment allocation which finds free - segments for data from the beginning of main area, while - for node from the end of main area. -nouser_xattr Disable Extended User Attributes. Note: xattr is enabled - by default if CONFIG_F2FS_FS_XATTR is selected. -noacl Disable POSIX Access Control List. Note: acl is enabled - by default if CONFIG_F2FS_FS_POSIX_ACL is selected. -active_logs=%u Support configuring the number of active logs. In the - current design, f2fs supports only 2, 4, and 6 logs. - Default number is 6. -disable_ext_identify Disable the extension list configured by mkfs, so f2fs - does not aware of cold files such as media files. -inline_xattr Enable the inline xattrs feature. -noinline_xattr Disable the inline xattrs feature. -inline_xattr_size=%u Support configuring inline xattr size, it depends on - flexible inline xattr feature. -inline_data Enable the inline data feature: New created small(<~3.4k) - files can be written into inode block. -inline_dentry Enable the inline dir feature: data in new created - directory entries can be written into inode block. The - space of inode block which is used to store inline - dentries is limited to ~3.4k. -noinline_dentry Disable the inline dentry feature. -flush_merge Merge concurrent cache_flush commands as much as possible - to eliminate redundant command issues. If the underlying - device handles the cache_flush command relatively slowly, - recommend to enable this option. -nobarrier This option can be used if underlying storage guarantees - its cached data should be written to the novolatile area. - If this option is set, no cache_flush commands are issued - but f2fs still guarantees the write ordering of all the - data writes. -fastboot This option is used when a system wants to reduce mount - time as much as possible, even though normal performance - can be sacrificed. -extent_cache Enable an extent cache based on rb-tree, it can cache - as many as extent which map between contiguous logical - address and physical address per inode, resulting in - increasing the cache hit ratio. Set by default. -noextent_cache Disable an extent cache based on rb-tree explicitly, see - the above extent_cache mount option. -noinline_data Disable the inline data feature, inline data feature is - enabled by default. -data_flush Enable data flushing before checkpoint in order to - persist data of regular and symlink. -reserve_root=%d Support configuring reserved space which is used for - allocation from a privileged user with specified uid or - gid, unit: 4KB, the default limit is 0.2% of user blocks. -resuid=%d The user ID which may use the reserved blocks. -resgid=%d The group ID which may use the reserved blocks. -fault_injection=%d Enable fault injection in all supported types with - specified injection rate. -fault_type=%d Support configuring fault injection type, should be - enabled with fault_injection option, fault type value - is shown below, it supports single or combined type. - Type_Name Type_Value - FAULT_KMALLOC 0x000000001 - FAULT_KVMALLOC 0x000000002 - FAULT_PAGE_ALLOC 0x000000004 - FAULT_PAGE_GET 0x000000008 - FAULT_ALLOC_BIO 0x000000010 - FAULT_ALLOC_NID 0x000000020 - FAULT_ORPHAN 0x000000040 - FAULT_BLOCK 0x000000080 - FAULT_DIR_DEPTH 0x000000100 - FAULT_EVICT_INODE 0x000000200 - FAULT_TRUNCATE 0x000000400 - FAULT_READ_IO 0x000000800 - FAULT_CHECKPOINT 0x000001000 - FAULT_DISCARD 0x000002000 - FAULT_WRITE_IO 0x000004000 -mode=%s Control block allocation mode which supports "adaptive" - and "lfs". In "lfs" mode, there should be no random - writes towards main area. -io_bits=%u Set the bit size of write IO requests. It should be set - with "mode=lfs". -usrquota Enable plain user disk quota accounting. -grpquota Enable plain group disk quota accounting. -prjquota Enable plain project quota accounting. -usrjquota= Appoint specified file and type during mount, so that quota -grpjquota= information can be properly updated during recovery flow, -prjjquota= : must be in root directory; -jqfmt= : [vfsold,vfsv0,vfsv1]. -offusrjquota Turn off user journelled quota. -offgrpjquota Turn off group journelled quota. -offprjjquota Turn off project journelled quota. -quota Enable plain user disk quota accounting. -noquota Disable all plain disk quota option. -whint_mode=%s Control which write hints are passed down to block - layer. This supports "off", "user-based", and - "fs-based". In "off" mode (default), f2fs does not pass - down hints. In "user-based" mode, f2fs tries to pass - down hints given by users. And in "fs-based" mode, f2fs - passes down hints with its policy. -alloc_mode=%s Adjust block allocation policy, which supports "reuse" - and "default". -fsync_mode=%s Control the policy of fsync. Currently supports "posix", - "strict", and "nobarrier". In "posix" mode, which is - default, fsync will follow POSIX semantics and does a - light operation to improve the filesystem performance. - In "strict" mode, fsync will be heavy and behaves in line - with xfs, ext4 and btrfs, where xfstest generic/342 will - pass, but the performance will regress. "nobarrier" is - based on "posix", but doesn't issue flush command for - non-atomic files likewise "nobarrier" mount option. -test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt - context. The fake fscrypt context is used by xfstests. -checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable" - to reenable checkpointing. Is enabled by default. While - disabled, any unmounting or unexpected shutdowns will cause - the filesystem contents to appear as they did when the - filesystem was mounted with that option. - While mounting with checkpoint=disabled, the filesystem must - run garbage collection to ensure that all available space can - be used. If this takes too much time, the mount may return - EAGAIN. You may optionally add a value to indicate how much - of the disk you would be willing to temporarily give up to - avoid additional garbage collection. This can be given as a - number of blocks, or as a percent. For instance, mounting - with checkpoint=disable:100% would always succeed, but it may - hide up to all remaining free space. The actual space that - would be unusable can be viewed at /sys/fs/f2fs//unusable - This space is reclaimed once checkpoint=enable. -compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo" - and "lz4" algorithm. -compress_log_size=%u Support configuring compress cluster size, the size will - be 4KB * (1 << %u), 16KB is minimum size, also it's - default size. -compress_extension=%s Support adding specified extension, so that f2fs can enable - compression on those corresponding files, e.g. if all files - with '.ext' has high compression rate, we can set the '.ext' - on compression extension list and enable compression on - these file by default rather than to enable it via ioctl. - For other files, we can still enable compression via ioctl. - -================================================================================ -DEBUGFS ENTRIES -================================================================================ - -/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as -f2fs. Each file shows the whole f2fs information. - -/sys/kernel/debug/f2fs/status includes: - - major file system information managed by f2fs currently - - average SIT information about whole segments - - current memory footprint consumed by f2fs. - -================================================================================ -SYSFS ENTRIES -================================================================================ - -Information about mounted f2fs file systems can be found in -/sys/fs/f2fs. Each mounted filesystem will have a directory in -/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda). -The files in each per-device directory are shown in table below. - -Files in /sys/fs/f2fs/ -(see also Documentation/ABI/testing/sysfs-fs-f2fs) - -================================================================================ -USAGE -================================================================================ - -1. Download userland tools and compile them. - -2. Skip, if f2fs was compiled statically inside kernel. - Otherwise, insert the f2fs.ko module. - # insmod f2fs.ko - -3. Create a directory trying to mount - # mkdir /mnt/f2fs - -4. Format the block device, and then mount as f2fs - # mkfs.f2fs -l label /dev/block_device - # mount -t f2fs /dev/block_device /mnt/f2fs - -mkfs.f2fs ---------- -The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem, -which builds a basic on-disk layout. - -The options consist of: --l [label] : Give a volume label, up to 512 unicode name. --a [0 or 1] : Split start location of each area for heap-based allocation. - 1 is set by default, which performs this. --o [int] : Set overprovision ratio in percent over volume size. - 5 is set by default. --s [int] : Set the number of segments per section. - 1 is set by default. --z [int] : Set the number of sections per zone. - 1 is set by default. --e [str] : Set basic extension list. e.g. "mp3,gif,mov" --t [0 or 1] : Disable discard command or not. - 1 is set by default, which conducts discard. - -fsck.f2fs ---------- -The fsck.f2fs is a tool to check the consistency of an f2fs-formatted -partition, which examines whether the filesystem metadata and user-made data -are cross-referenced correctly or not. -Note that, initial version of the tool does not fix any inconsistency. - -The options consist of: - -d debug level [default:0] - -dump.f2fs ---------- -The dump.f2fs shows the information of specific inode and dumps SSA and SIT to -file. Each file is dump_ssa and dump_sit. - -The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem. -It shows on-disk inode information recognized by a given inode number, and is -able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and -./dump_sit respectively. - -The options consist of: - -d debug level [default:0] - -i inode no (hex) - -s [SIT dump segno from #1~#2 (decimal), for all 0~-1] - -a [SSA dump segno from #1~#2 (decimal), for all 0~-1] - -Examples: -# dump.f2fs -i [ino] /dev/sdx -# dump.f2fs -s 0~-1 /dev/sdx (SIT dump) -# dump.f2fs -a 0~-1 /dev/sdx (SSA dump) - -================================================================================ -DESIGN -================================================================================ - -On-disk Layout --------------- - -F2FS divides the whole volume into a number of segments, each of which is fixed -to 2MB in size. A section is composed of consecutive segments, and a zone -consists of a set of sections. By default, section and zone sizes are set to one -segment size identically, but users can easily modify the sizes by mkfs. - -F2FS splits the entire volume into six areas, and all the areas except superblock -consists of multiple segments as described below. - - align with the zone size <-| - |-> align with the segment size - _________________________________________________________________________ - | | | Segment | Node | Segment | | - | Superblock | Checkpoint | Info. | Address | Summary | Main | - | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | | - |____________|_____2______|______N______|______N______|______N_____|__N___| - . . - . . - . . - ._________________________________________. - |_Segment_|_..._|_Segment_|_..._|_Segment_| - . . - ._________._________ - |_section_|__...__|_ - . . - .________. - |__zone__| - -- Superblock (SB) - : It is located at the beginning of the partition, and there exist two copies - to avoid file system crash. It contains basic partition information and some - default parameters of f2fs. - -- Checkpoint (CP) - : It contains file system information, bitmaps for valid NAT/SIT sets, orphan - inode lists, and summary entries of current active segments. - -- Segment Information Table (SIT) - : It contains segment information such as valid block count and bitmap for the - validity of all the blocks. - -- Node Address Table (NAT) - : It is composed of a block address table for all the node blocks stored in - Main area. - -- Segment Summary Area (SSA) - : It contains summary entries which contains the owner information of all the - data and node blocks stored in Main area. - -- Main Area - : It contains file and directory data including their indices. - -In order to avoid misalignment between file system and flash-based storage, F2FS -aligns the start block address of CP with the segment size. Also, it aligns the -start block address of Main area with the zone size by reserving some segments -in SSA area. - -Reference the following survey for additional technical details. -https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey - -File System Metadata Structure ------------------------------- - -F2FS adopts the checkpointing scheme to maintain file system consistency. At -mount time, F2FS first tries to find the last valid checkpoint data by scanning -CP area. In order to reduce the scanning time, F2FS uses only two copies of CP. -One of them always indicates the last valid data, which is called as shadow copy -mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism. - -For file system consistency, each CP points to which NAT and SIT copies are -valid, as shown as below. - - +--------+----------+---------+ - | CP | SIT | NAT | - +--------+----------+---------+ - . . . . - . . . . - . . . . - +-------+-------+--------+--------+--------+--------+ - | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 | - +-------+-------+--------+--------+--------+--------+ - | ^ ^ - | | | - `----------------------------------------' - -Index Structure ---------------- - -The key data structure to manage the data locations is a "node". Similar to -traditional file structures, F2FS has three types of node: inode, direct node, -indirect node. F2FS assigns 4KB to an inode block which contains 923 data block -indices, two direct node pointers, two indirect node pointers, and one double -indirect node pointer as described below. One direct node block contains 1018 -data blocks, and one indirect node block contains also 1018 node blocks. Thus, -one inode block (i.e., a file) covers: - - 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB. - - Inode block (4KB) - |- data (923) - |- direct node (2) - | `- data (1018) - |- indirect node (2) - | `- direct node (1018) - | `- data (1018) - `- double indirect node (1) - `- indirect node (1018) - `- direct node (1018) - `- data (1018) - -Note that, all the node blocks are mapped by NAT which means the location of -each node is translated by the NAT table. In the consideration of the wandering -tree problem, F2FS is able to cut off the propagation of node updates caused by -leaf data writes. - -Directory Structure -------------------- - -A directory entry occupies 11 bytes, which consists of the following attributes. - -- hash hash value of the file name -- ino inode number -- len the length of file name -- type file type such as directory, symlink, etc - -A dentry block consists of 214 dentry slots and file names. Therein a bitmap is -used to represent whether each dentry is valid or not. A dentry block occupies -4KB with the following composition. - - Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) + - dentries(11 * 214 bytes) + file name (8 * 214 bytes) - - [Bucket] - +--------------------------------+ - |dentry block 1 | dentry block 2 | - +--------------------------------+ - . . - . . - . [Dentry Block Structure: 4KB] . - +--------+----------+----------+------------+ - | bitmap | reserved | dentries | file names | - +--------+----------+----------+------------+ - [Dentry Block: 4KB] . . - . . - . . - +------+------+-----+------+ - | hash | ino | len | type | - +------+------+-----+------+ - [Dentry Structure: 11 bytes] - -F2FS implements multi-level hash tables for directory structure. Each level has -a hash table with dedicated number of hash buckets as shown below. Note that -"A(2B)" means a bucket includes 2 data blocks. - ----------------------- -A : bucket -B : block -N : MAX_DIR_HASH_DEPTH ----------------------- - -level #0 | A(2B) - | -level #1 | A(2B) - A(2B) - | -level #2 | A(2B) - A(2B) - A(2B) - A(2B) - . | . . . . -level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) - . | . . . . -level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) - -The number of blocks and buckets are determined by, - - ,- 2, if n < MAX_DIR_HASH_DEPTH / 2, - # of blocks in level #n = | - `- 4, Otherwise - - ,- 2^(n + dir_level), - | if n + dir_level < MAX_DIR_HASH_DEPTH / 2, - # of buckets in level #n = | - `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), - Otherwise - -When F2FS finds a file name in a directory, at first a hash value of the file -name is calculated. Then, F2FS scans the hash table in level #0 to find the -dentry consisting of the file name and its inode number. If not found, F2FS -scans the next hash table in level #1. In this way, F2FS scans hash tables in -each levels incrementally from 1 to N. In each levels F2FS needs to scan only -one bucket determined by the following equation, which shows O(log(# of files)) -complexity. - - bucket number to scan in level #n = (hash value) % (# of buckets in level #n) - -In the case of file creation, F2FS finds empty consecutive slots that cover the -file name. F2FS searches the empty slots in the hash tables of whole levels from -1 to N in the same way as the lookup operation. - -The following figure shows an example of two cases holding children. - --------------> Dir <-------------- - | | - child child - - child - child [hole] - child - - child - child - child [hole] - [hole] - child - - Case 1: Case 2: - Number of children = 6, Number of children = 3, - File size = 7 File size = 7 - -Default Block Allocation ------------------------- - -At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node -and Hot/Warm/Cold data. - -- Hot node contains direct node blocks of directories. -- Warm node contains direct node blocks except hot node blocks. -- Cold node contains indirect node blocks -- Hot data contains dentry blocks -- Warm data contains data blocks except hot and cold data blocks -- Cold data contains multimedia data or migrated data blocks - -LFS has two schemes for free space management: threaded log and copy-and-compac- -tion. The copy-and-compaction scheme which is known as cleaning, is well-suited -for devices showing very good sequential write performance, since free segments -are served all the time for writing new data. However, it suffers from cleaning -overhead under high utilization. Contrarily, the threaded log scheme suffers -from random writes, but no cleaning process is needed. F2FS adopts a hybrid -scheme where the copy-and-compaction scheme is adopted by default, but the -policy is dynamically changed to the threaded log scheme according to the file -system status. - -In order to align F2FS with underlying flash-based storage, F2FS allocates a -segment in a unit of section. F2FS expects that the section size would be the -same as the unit size of garbage collection in FTL. Furthermore, with respect -to the mapping granularity in FTL, F2FS allocates each section of the active -logs from different zones as much as possible, since FTL can write the data in -the active logs into one allocation unit according to its mapping granularity. - -Cleaning process ----------------- - -F2FS does cleaning both on demand and in the background. On-demand cleaning is -triggered when there are not enough free segments to serve VFS calls. Background -cleaner is operated by a kernel thread, and triggers the cleaning job when the -system is idle. - -F2FS supports two victim selection policies: greedy and cost-benefit algorithms. -In the greedy algorithm, F2FS selects a victim segment having the smallest number -of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment -according to the segment age and the number of valid blocks in order to address -log block thrashing problem in the greedy algorithm. F2FS adopts the greedy -algorithm for on-demand cleaner, while background cleaner adopts cost-benefit -algorithm. - -In order to identify whether the data in the victim segment are valid or not, -F2FS manages a bitmap. Each bit represents the validity of a block, and the -bitmap is composed of a bit stream covering whole blocks in main area. - -Write-hint Policy ------------------ - -1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET. - -2) whint_mode=user-based. F2FS tries to pass down hints given by -users. - -User F2FS Block ----- ---- ----- - META WRITE_LIFE_NOT_SET - HOT_NODE " - WARM_NODE " - COLD_NODE " -*ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME -*extension list " " - --- buffered io -WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME -WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT -WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET -WRITE_LIFE_NONE " " -WRITE_LIFE_MEDIUM " " -WRITE_LIFE_LONG " " - --- direct io -WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME -WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT -WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET -WRITE_LIFE_NONE " WRITE_LIFE_NONE -WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM -WRITE_LIFE_LONG " WRITE_LIFE_LONG - -3) whint_mode=fs-based. F2FS passes down hints with its policy. - -User F2FS Block ----- ---- ----- - META WRITE_LIFE_MEDIUM; - HOT_NODE WRITE_LIFE_NOT_SET - WARM_NODE " - COLD_NODE WRITE_LIFE_NONE -ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME -extension list " " - --- buffered io -WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME -WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT -WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG -WRITE_LIFE_NONE " " -WRITE_LIFE_MEDIUM " " -WRITE_LIFE_LONG " " - --- direct io -WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME -WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT -WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET -WRITE_LIFE_NONE " WRITE_LIFE_NONE -WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM -WRITE_LIFE_LONG " WRITE_LIFE_LONG - -Fallocate(2) Policy -------------------- - -The default policy follows the below posix rule. - -Allocating disk space - The default operation (i.e., mode is zero) of fallocate() allocates - the disk space within the range specified by offset and len. The - file size (as reported by stat(2)) will be changed if offset+len is - greater than the file size. Any subregion within the range specified - by offset and len that did not contain data before the call will be - initialized to zero. This default behavior closely resembles the - behavior of the posix_fallocate(3) library function, and is intended - as a method of optimally implementing that function. - -However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to -fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addressess having -zero or random data, which is useful to the below scenario where: - 1. create(fd) - 2. ioctl(fd, F2FS_IOC_SET_PIN_FILE) - 3. fallocate(fd, 0, 0, size) - 4. address = fibmap(fd, offset) - 5. open(blkdev) - 6. write(blkdev, address) - -Compression implementation --------------------------- - -- New term named cluster is defined as basic unit of compression, file can -be divided into multiple clusters logically. One cluster includes 4 << n -(n >= 0) logical pages, compression size is also cluster size, each of -cluster can be compressed or not. - -- In cluster metadata layout, one special block address is used to indicate -cluster is compressed one or normal one, for compressed cluster, following -metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs -stores data including compress header and compressed data. - -- In order to eliminate write amplification during overwrite, F2FS only -support compression on write-once file, data can be compressed only when -all logical blocks in file are valid and cluster compress ratio is lower -than specified threshold. - -- To enable compression on regular inode, there are three ways: -* chattr +c file -* chattr +c dir; touch dir/file -* mount w/ -o compress_extension=ext; touch file.ext - -Compress metadata layout: - [Dnode Structure] - +-----------------------------------------------+ - | cluster 1 | cluster 2 | ......... | cluster N | - +-----------------------------------------------+ - . . . . - . . . . - . Compressed Cluster . . Normal Cluster . -+----------+---------+---------+---------+ +---------+---------+---------+---------+ -|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 | -+----------+---------+---------+---------+ +---------+---------+---------+---------+ - . . - . . - . . - +-------------+-------------+----------+----------------------------+ - | data length | data chksum | reserved | compressed data | - +-------------+-------------+----------+----------------------------+ diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index aa2c3d1de3de..f69d20406be0 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -64,6 +64,7 @@ Documentation for filesystem implementations. erofs ext2 ext3 + f2fs fuse overlayfs virtiofs -- cgit v1.2.3 From 720c2fc1ec7cb36bfc5326603522bc3955534773 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:05 +0100 Subject: docs: filesystems: convert gfs2.txt to ReST - Add a SPDX header; - Adjust document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Bob Peterson Link: https://lore.kernel.org/r/6d7a296de025bcfed7a229da7f8cc1678944f304.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/gfs2.rst | 53 +++++++++++++++++++++++++++++++++++++ Documentation/filesystems/gfs2.txt | 45 ------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 54 insertions(+), 45 deletions(-) create mode 100644 Documentation/filesystems/gfs2.rst delete mode 100644 Documentation/filesystems/gfs2.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/gfs2.rst b/Documentation/filesystems/gfs2.rst new file mode 100644 index 000000000000..8d1ab589ce18 --- /dev/null +++ b/Documentation/filesystems/gfs2.rst @@ -0,0 +1,53 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================== +Global File System +================== + +https://fedorahosted.org/cluster/wiki/HomePage + +GFS is a cluster file system. It allows a cluster of computers to +simultaneously use a block device that is shared between them (with FC, +iSCSI, NBD, etc). GFS reads and writes to the block device like a local +file system, but also uses a lock module to allow the computers coordinate +their I/O so file system consistency is maintained. One of the nifty +features of GFS is perfect consistency -- changes made to the file system +on one machine show up immediately on all other machines in the cluster. + +GFS uses interchangeable inter-node locking mechanisms, the currently +supported mechanisms are: + + lock_nolock + - allows gfs to be used as a local file system + + lock_dlm + - uses a distributed lock manager (dlm) for inter-node locking. + The dlm is found at linux/fs/dlm/ + +Lock_dlm depends on user space cluster management systems found +at the URL above. + +To use gfs as a local file system, no external clustering systems are +needed, simply:: + + $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device + $ mount -t gfs2 /dev/block_device /dir + +If you are using Fedora, you need to install the gfs2-utils package +and, for lock_dlm, you will also need to install the cman package +and write a cluster.conf as per the documentation. For F17 and above +cman has been replaced by the dlm package. + +GFS2 is not on-disk compatible with previous versions of GFS, but it +is pretty close. + +The following man pages can be found at the URL above: + + ============ ============================================= + fsck.gfs2 to repair a filesystem + gfs2_grow to expand a filesystem online + gfs2_jadd to add journals to a filesystem online + tunegfs2 to manipulate, examine and tune a filesystem + gfs2_convert to convert a gfs filesystem to gfs2 in-place + mkfs.gfs2 to make a filesystem + ============ ============================================= diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt deleted file mode 100644 index cc4f2306609e..000000000000 --- a/Documentation/filesystems/gfs2.txt +++ /dev/null @@ -1,45 +0,0 @@ -Global File System ------------------- - -https://fedorahosted.org/cluster/wiki/HomePage - -GFS is a cluster file system. It allows a cluster of computers to -simultaneously use a block device that is shared between them (with FC, -iSCSI, NBD, etc). GFS reads and writes to the block device like a local -file system, but also uses a lock module to allow the computers coordinate -their I/O so file system consistency is maintained. One of the nifty -features of GFS is perfect consistency -- changes made to the file system -on one machine show up immediately on all other machines in the cluster. - -GFS uses interchangeable inter-node locking mechanisms, the currently -supported mechanisms are: - - lock_nolock -- allows gfs to be used as a local file system - - lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking - The dlm is found at linux/fs/dlm/ - -Lock_dlm depends on user space cluster management systems found -at the URL above. - -To use gfs as a local file system, no external clustering systems are -needed, simply: - - $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device - $ mount -t gfs2 /dev/block_device /dir - -If you are using Fedora, you need to install the gfs2-utils package -and, for lock_dlm, you will also need to install the cman package -and write a cluster.conf as per the documentation. For F17 and above -cman has been replaced by the dlm package. - -GFS2 is not on-disk compatible with previous versions of GFS, but it -is pretty close. - -The following man pages can be found at the URL above: - fsck.gfs2 to repair a filesystem - gfs2_grow to expand a filesystem online - gfs2_jadd to add journals to a filesystem online - tunegfs2 to manipulate, examine and tune a filesystem - gfs2_convert to convert a gfs filesystem to gfs2 in-place - mkfs.gfs2 to make a filesystem diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index f69d20406be0..f24befe78326 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -65,6 +65,7 @@ Documentation for filesystem implementations. ext2 ext3 f2fs + gfs2 fuse overlayfs virtiofs -- cgit v1.2.3 From 5b7ac27a6e2c54cc09f479b616f1076afeae3c1b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:06 +0100 Subject: docs: filesystems: convert gfs2-uevents.txt to ReST This document is almost in ReST format: all it needs is to have the titles adjusted and add a SPDX header. In other words: - Add a SPDX header; - Add a document title; - Adjust section titles; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Bob Peterson Link: https://lore.kernel.org/r/1d1c46b7e86bd0a18d9abbea0de0bc2be84e5e2b.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/gfs2-uevents.rst | 112 +++++++++++++++++++++++++++++ Documentation/filesystems/gfs2-uevents.txt | 100 -------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 113 insertions(+), 100 deletions(-) create mode 100644 Documentation/filesystems/gfs2-uevents.rst delete mode 100644 Documentation/filesystems/gfs2-uevents.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/gfs2-uevents.rst b/Documentation/filesystems/gfs2-uevents.rst new file mode 100644 index 000000000000..f162a2c76c69 --- /dev/null +++ b/Documentation/filesystems/gfs2-uevents.rst @@ -0,0 +1,112 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================ +uevents and GFS2 +================ + +During the lifetime of a GFS2 mount, a number of uevents are generated. +This document explains what the events are and what they are used +for (by gfs_controld in gfs2-utils). + +A list of GFS2 uevents +====================== + +1. ADD +------ + +The ADD event occurs at mount time. It will always be the first +uevent generated by the newly created filesystem. If the mount +is successful, an ONLINE uevent will follow. If it is not successful +then a REMOVE uevent will follow. + +The ADD uevent has two environment variables: SPECTATOR=[0|1] +and RDONLY=[0|1] that specify the spectator status (a read-only mount +with no journal assigned), and read-only (with journal assigned) status +of the filesystem respectively. + +2. ONLINE +--------- + +The ONLINE uevent is generated after a successful mount or remount. It +has the same environment variables as the ADD uevent. The ONLINE +uevent, along with the two environment variables for spectator and +RDONLY are a relatively recent addition (2.6.32-rc+) and will not +be generated by older kernels. + +3. CHANGE +--------- + +The CHANGE uevent is used in two places. One is when reporting the +successful mount of the filesystem by the first node (FIRSTMOUNT=Done). +This is used as a signal by gfs_controld that it is then ok for other +nodes in the cluster to mount the filesystem. + +The other CHANGE uevent is used to inform of the completion +of journal recovery for one of the filesystems journals. It has +two environment variables, JID= which specifies the journal id which +has just been recovered, and RECOVERY=[Done|Failed] to indicate the +success (or otherwise) of the operation. These uevents are generated +for every journal recovered, whether it is during the initial mount +process or as the result of gfs_controld requesting a specific journal +recovery via the /sys/fs/gfs2//lock_module/recovery file. + +Because the CHANGE uevent was used (in early versions of gfs_controld) +without checking the environment variables to discover the state, we +cannot add any more functions to it without running the risk of +someone using an older version of the user tools and breaking their +cluster. For this reason the ONLINE uevent was used when adding a new +uevent for a successful mount or remount. + +4. OFFLINE +---------- + +The OFFLINE uevent is only generated due to filesystem errors and is used +as part of the "withdraw" mechanism. Currently this doesn't give any +information about what the error is, which is something that needs to +be fixed. + +5. REMOVE +--------- + +The REMOVE uevent is generated at the end of an unsuccessful mount +or at the end of a umount of the filesystem. All REMOVE uevents will +have been preceded by at least an ADD uevent for the same filesystem, +and unlike the other uevents is generated automatically by the kernel's +kobject subsystem. + + +Information common to all GFS2 uevents (uevent environment variables) +===================================================================== + +1. LOCKTABLE= +-------------- + +The LOCKTABLE is a string, as supplied on the mount command +line (locktable=) or via fstab. It is used as a filesystem label +as well as providing the information for a lock_dlm mount to be +able to join the cluster. + +2. LOCKPROTO= +------------- + +The LOCKPROTO is a string, and its value depends on what is set +on the mount command line, or via fstab. It will be either +lock_nolock or lock_dlm. In the future other lock managers +may be supported. + +3. JOURNALID= +------------- + +If a journal is in use by the filesystem (journals are not +assigned for spectator mounts) then this will give the +numeric journal id in all GFS2 uevents. + +4. UUID= +-------- + +With recent versions of gfs2-utils, mkfs.gfs2 writes a UUID +into the filesystem superblock. If it exists, this will +be included in every uevent relating to the filesystem. + + + diff --git a/Documentation/filesystems/gfs2-uevents.txt b/Documentation/filesystems/gfs2-uevents.txt deleted file mode 100644 index 19a19ebebc34..000000000000 --- a/Documentation/filesystems/gfs2-uevents.txt +++ /dev/null @@ -1,100 +0,0 @@ - uevents and GFS2 - ================== - -During the lifetime of a GFS2 mount, a number of uevents are generated. -This document explains what the events are and what they are used -for (by gfs_controld in gfs2-utils). - -A list of GFS2 uevents ------------------------ - -1. ADD - -The ADD event occurs at mount time. It will always be the first -uevent generated by the newly created filesystem. If the mount -is successful, an ONLINE uevent will follow. If it is not successful -then a REMOVE uevent will follow. - -The ADD uevent has two environment variables: SPECTATOR=[0|1] -and RDONLY=[0|1] that specify the spectator status (a read-only mount -with no journal assigned), and read-only (with journal assigned) status -of the filesystem respectively. - -2. ONLINE - -The ONLINE uevent is generated after a successful mount or remount. It -has the same environment variables as the ADD uevent. The ONLINE -uevent, along with the two environment variables for spectator and -RDONLY are a relatively recent addition (2.6.32-rc+) and will not -be generated by older kernels. - -3. CHANGE - -The CHANGE uevent is used in two places. One is when reporting the -successful mount of the filesystem by the first node (FIRSTMOUNT=Done). -This is used as a signal by gfs_controld that it is then ok for other -nodes in the cluster to mount the filesystem. - -The other CHANGE uevent is used to inform of the completion -of journal recovery for one of the filesystems journals. It has -two environment variables, JID= which specifies the journal id which -has just been recovered, and RECOVERY=[Done|Failed] to indicate the -success (or otherwise) of the operation. These uevents are generated -for every journal recovered, whether it is during the initial mount -process or as the result of gfs_controld requesting a specific journal -recovery via the /sys/fs/gfs2//lock_module/recovery file. - -Because the CHANGE uevent was used (in early versions of gfs_controld) -without checking the environment variables to discover the state, we -cannot add any more functions to it without running the risk of -someone using an older version of the user tools and breaking their -cluster. For this reason the ONLINE uevent was used when adding a new -uevent for a successful mount or remount. - -4. OFFLINE - -The OFFLINE uevent is only generated due to filesystem errors and is used -as part of the "withdraw" mechanism. Currently this doesn't give any -information about what the error is, which is something that needs to -be fixed. - -5. REMOVE - -The REMOVE uevent is generated at the end of an unsuccessful mount -or at the end of a umount of the filesystem. All REMOVE uevents will -have been preceded by at least an ADD uevent for the same filesystem, -and unlike the other uevents is generated automatically by the kernel's -kobject subsystem. - - -Information common to all GFS2 uevents (uevent environment variables) ----------------------------------------------------------------------- - -1. LOCKTABLE= - -The LOCKTABLE is a string, as supplied on the mount command -line (locktable=) or via fstab. It is used as a filesystem label -as well as providing the information for a lock_dlm mount to be -able to join the cluster. - -2. LOCKPROTO= - -The LOCKPROTO is a string, and its value depends on what is set -on the mount command line, or via fstab. It will be either -lock_nolock or lock_dlm. In the future other lock managers -may be supported. - -3. JOURNALID= - -If a journal is in use by the filesystem (journals are not -assigned for spectator mounts) then this will give the -numeric journal id in all GFS2 uevents. - -4. UUID= - -With recent versions of gfs2-utils, mkfs.gfs2 writes a UUID -into the filesystem superblock. If it exists, this will -be included in every uevent relating to the filesystem. - - - diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index f24befe78326..c16e517e37c5 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -66,6 +66,7 @@ Documentation for filesystem implementations. ext3 f2fs gfs2 + gfs2-uevents fuse overlayfs virtiofs -- cgit v1.2.3 From cdded7db3625c98e66316911947bd3a1941992e2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:07 +0100 Subject: docs: filesystems: convert hfsplus.txt to ReST Just trivial changes: - Add a SPDX header; - Add it to filesystems/index.rst. While here, adjust document title, just to make it use the same style of the other docs. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/4298409da951fbee000201a6c8d9c85e961b2b79.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/hfsplus.rst | 61 +++++++++++++++++++++++++++++++++++ Documentation/filesystems/hfsplus.txt | 59 --------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 62 insertions(+), 59 deletions(-) create mode 100644 Documentation/filesystems/hfsplus.rst delete mode 100644 Documentation/filesystems/hfsplus.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/hfsplus.rst b/Documentation/filesystems/hfsplus.rst new file mode 100644 index 000000000000..f02f4f5fc020 --- /dev/null +++ b/Documentation/filesystems/hfsplus.rst @@ -0,0 +1,61 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================================== +Macintosh HFSPlus Filesystem for Linux +====================================== + +HFSPlus is a filesystem first introduced in MacOS 8.1. +HFSPlus has several extensions to HFS, including 32-bit allocation +blocks, 255-character unicode filenames, and file sizes of 2^63 bytes. + + +Mount options +============= + +When mounting an HFSPlus filesystem, the following options are accepted: + + creator=cccc, type=cccc + Specifies the creator/type values as shown by the MacOS finder + used for creating new files. Default values: '????'. + + uid=n, gid=n + Specifies the user/group that owns all files on the filesystem + that have uninitialized permissions structures. + Default: user/group id of the mounting process. + + umask=n + Specifies the umask (in octal) used for files and directories + that have uninitialized permissions structures. + Default: umask of the mounting process. + + session=n + Select the CDROM session to mount as HFSPlus filesystem. Defaults to + leaving that decision to the CDROM driver. This option will fail + with anything but a CDROM as underlying devices. + + part=n + Select partition number n from the devices. This option only makes + sense for CDROMs because they can't be partitioned under Linux. + For disk devices the generic partition parsing code does this + for us. Defaults to not parsing the partition table at all. + + decompose + Decompose file name characters. + + nodecompose + Do not decompose file name characters. + + force + Used to force write access to volumes that are marked as journalled + or locked. Use at your own risk. + + nls=cccc + Encoding to use when presenting file names. + + +References +========== + +kernel source: + +Apple Technote 1150 https://developer.apple.com/legacy/library/technotes/tn/tn1150.html diff --git a/Documentation/filesystems/hfsplus.txt b/Documentation/filesystems/hfsplus.txt deleted file mode 100644 index 59f7569fc9ed..000000000000 --- a/Documentation/filesystems/hfsplus.txt +++ /dev/null @@ -1,59 +0,0 @@ - -Macintosh HFSPlus Filesystem for Linux -====================================== - -HFSPlus is a filesystem first introduced in MacOS 8.1. -HFSPlus has several extensions to HFS, including 32-bit allocation -blocks, 255-character unicode filenames, and file sizes of 2^63 bytes. - - -Mount options -============= - -When mounting an HFSPlus filesystem, the following options are accepted: - - creator=cccc, type=cccc - Specifies the creator/type values as shown by the MacOS finder - used for creating new files. Default values: '????'. - - uid=n, gid=n - Specifies the user/group that owns all files on the filesystem - that have uninitialized permissions structures. - Default: user/group id of the mounting process. - - umask=n - Specifies the umask (in octal) used for files and directories - that have uninitialized permissions structures. - Default: umask of the mounting process. - - session=n - Select the CDROM session to mount as HFSPlus filesystem. Defaults to - leaving that decision to the CDROM driver. This option will fail - with anything but a CDROM as underlying devices. - - part=n - Select partition number n from the devices. This option only makes - sense for CDROMs because they can't be partitioned under Linux. - For disk devices the generic partition parsing code does this - for us. Defaults to not parsing the partition table at all. - - decompose - Decompose file name characters. - - nodecompose - Do not decompose file name characters. - - force - Used to force write access to volumes that are marked as journalled - or locked. Use at your own risk. - - nls=cccc - Encoding to use when presenting file names. - - -References -========== - -kernel source: - -Apple Technote 1150 https://developer.apple.com/legacy/library/technotes/tn/tn1150.html diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index c16e517e37c5..c351bc8a8c85 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -67,6 +67,7 @@ Documentation for filesystem implementations. f2fs gfs2 gfs2-uevents + hfsplus fuse overlayfs virtiofs -- cgit v1.2.3 From 5040a0acc8f2300ef35a1d9cc1c50a25235e061d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:08 +0100 Subject: docs: filesystems: convert hfs.txt to ReST - Add a SPDX header; - Adjust document and section titles; - Use notes markups; - Add lists markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/8a625d6652d88809730020048d26c3b9333ddbdf.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/hfs.rst | 87 +++++++++++++++++++++++++++++++++++++ Documentation/filesystems/hfs.txt | 82 ---------------------------------- Documentation/filesystems/index.rst | 1 + 3 files changed, 88 insertions(+), 82 deletions(-) create mode 100644 Documentation/filesystems/hfs.rst delete mode 100644 Documentation/filesystems/hfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/hfs.rst b/Documentation/filesystems/hfs.rst new file mode 100644 index 000000000000..ab17a005e9b1 --- /dev/null +++ b/Documentation/filesystems/hfs.rst @@ -0,0 +1,87 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +Macintosh HFS Filesystem for Linux +================================== + + +.. Note:: This filesystem doesn't have a maintainer. + + +HFS stands for ``Hierarchical File System`` and is the filesystem used +by the Mac Plus and all later Macintosh models. Earlier Macintosh +models used MFS (``Macintosh File System``), which is not supported, +MacOS 8.1 and newer support a filesystem called HFS+ that's similar to +HFS but is extended in various areas. Use the hfsplus filesystem driver +to access such filesystems from Linux. + + +Mount options +============= + +When mounting an HFS filesystem, the following options are accepted: + + creator=cccc, type=cccc + Specifies the creator/type values as shown by the MacOS finder + used for creating new files. Default values: '????'. + + uid=n, gid=n + Specifies the user/group that owns all files on the filesystems. + Default: user/group id of the mounting process. + + dir_umask=n, file_umask=n, umask=n + Specifies the umask used for all files , all directories or all + files and directories. Defaults to the umask of the mounting process. + + session=n + Select the CDROM session to mount as HFS filesystem. Defaults to + leaving that decision to the CDROM driver. This option will fail + with anything but a CDROM as underlying devices. + + part=n + Select partition number n from the devices. Does only makes + sense for CDROMS because they can't be partitioned under Linux. + For disk devices the generic partition parsing code does this + for us. Defaults to not parsing the partition table at all. + + quiet + Ignore invalid mount options instead of complaining. + + +Writing to HFS Filesystems +========================== + +HFS is not a UNIX filesystem, thus it does not have the usual features you'd +expect: + + * You can't modify the set-uid, set-gid, sticky or executable bits or the uid + and gid of files. + * You can't create hard- or symlinks, device files, sockets or FIFOs. + +HFS does on the other have the concepts of multiple forks per file. These +non-standard forks are represented as hidden additional files in the normal +filesystems namespace which is kind of a cludge and makes the semantics for +the a little strange: + + * You can't create, delete or rename resource forks of files or the + Finder's metadata. + * They are however created (with default values), deleted and renamed + along with the corresponding data fork or directory. + * Copying files to a different filesystem will loose those attributes + that are essential for MacOS to work. + + +Creating HFS filesystems +======================== + +The hfsutils package from Robert Leslie contains a program called +hformat that can be used to create HFS filesystem. See + for details. + + +Credits +======= + +The HFS drivers was written by Paul H. Hargrovea (hargrove@sccm.Stanford.EDU). +Roman Zippel (roman@ardistech.com) rewrote large parts of the code and brought +in btree routines derived from Brad Boyer's hfsplus driver. diff --git a/Documentation/filesystems/hfs.txt b/Documentation/filesystems/hfs.txt deleted file mode 100644 index d096df6db07a..000000000000 --- a/Documentation/filesystems/hfs.txt +++ /dev/null @@ -1,82 +0,0 @@ -Note: This filesystem doesn't have a maintainer. - -Macintosh HFS Filesystem for Linux -================================== - -HFS stands for ``Hierarchical File System'' and is the filesystem used -by the Mac Plus and all later Macintosh models. Earlier Macintosh -models used MFS (``Macintosh File System''), which is not supported, -MacOS 8.1 and newer support a filesystem called HFS+ that's similar to -HFS but is extended in various areas. Use the hfsplus filesystem driver -to access such filesystems from Linux. - - -Mount options -============= - -When mounting an HFS filesystem, the following options are accepted: - - creator=cccc, type=cccc - Specifies the creator/type values as shown by the MacOS finder - used for creating new files. Default values: '????'. - - uid=n, gid=n - Specifies the user/group that owns all files on the filesystems. - Default: user/group id of the mounting process. - - dir_umask=n, file_umask=n, umask=n - Specifies the umask used for all files , all directories or all - files and directories. Defaults to the umask of the mounting process. - - session=n - Select the CDROM session to mount as HFS filesystem. Defaults to - leaving that decision to the CDROM driver. This option will fail - with anything but a CDROM as underlying devices. - - part=n - Select partition number n from the devices. Does only makes - sense for CDROMS because they can't be partitioned under Linux. - For disk devices the generic partition parsing code does this - for us. Defaults to not parsing the partition table at all. - - quiet - Ignore invalid mount options instead of complaining. - - -Writing to HFS Filesystems -========================== - -HFS is not a UNIX filesystem, thus it does not have the usual features you'd -expect: - - o You can't modify the set-uid, set-gid, sticky or executable bits or the uid - and gid of files. - o You can't create hard- or symlinks, device files, sockets or FIFOs. - -HFS does on the other have the concepts of multiple forks per file. These -non-standard forks are represented as hidden additional files in the normal -filesystems namespace which is kind of a cludge and makes the semantics for -the a little strange: - - o You can't create, delete or rename resource forks of files or the - Finder's metadata. - o They are however created (with default values), deleted and renamed - along with the corresponding data fork or directory. - o Copying files to a different filesystem will loose those attributes - that are essential for MacOS to work. - - -Creating HFS filesystems -=================================== - -The hfsutils package from Robert Leslie contains a program called -hformat that can be used to create HFS filesystem. See - for details. - - -Credits -======= - -The HFS drivers was written by Paul H. Hargrovea (hargrove@sccm.Stanford.EDU). -Roman Zippel (roman@ardistech.com) rewrote large parts of the code and brought -in btree routines derived from Brad Boyer's hfsplus driver. diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index c351bc8a8c85..f776411340cb 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -67,6 +67,7 @@ Documentation for filesystem implementations. f2fs gfs2 gfs2-uevents + hfs hfsplus fuse overlayfs -- cgit v1.2.3 From a1ef4bcd1664a9c1ae5191598b769ab37b93aa57 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:09 +0100 Subject: docs: filesystems: convert hpfs.txt to ReST - Add a SPDX header; - Adjust document and section titles; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/581019c3120938118aa55ba28902b62083c3f37a.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/hpfs.rst | 353 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/hpfs.txt | 296 ------------------------------ Documentation/filesystems/index.rst | 1 + 3 files changed, 354 insertions(+), 296 deletions(-) create mode 100644 Documentation/filesystems/hpfs.rst delete mode 100644 Documentation/filesystems/hpfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/hpfs.rst b/Documentation/filesystems/hpfs.rst new file mode 100644 index 000000000000..0db152278572 --- /dev/null +++ b/Documentation/filesystems/hpfs.rst @@ -0,0 +1,353 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +Read/Write HPFS 2.09 +==================== + +1998-2004, Mikulas Patocka + +:email: mikulas@artax.karlin.mff.cuni.cz +:homepage: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi + +Credits +======= +Chris Smith, 1993, original read-only HPFS, some code and hpfs structures file + is taken from it + +Jacques Gelinas, MSDos mmap, Inspired by fs/nfs/mmap.c (Jon Tombs 15 Aug 1993) + +Werner Almesberger, 1992, 1993, MSDos option parser & CR/LF conversion + +Mount options + +uid=xxx,gid=xxx,umask=xxx (default uid=gid=0 umask=default_system_umask) + Set owner/group/mode for files that do not have it specified in extended + attributes. Mode is inverted umask - for example umask 027 gives owner + all permission, group read permission and anybody else no access. Note + that for files mode is anded with 0666. If you want files to have 'x' + rights, you must use extended attributes. +case=lower,asis (default asis) + File name lowercasing in readdir. +conv=binary,text,auto (default binary) + CR/LF -> LF conversion, if auto, decision is made according to extension + - there is a list of text extensions (I thing it's better to not convert + text file than to damage binary file). If you want to change that list, + change it in the source. Original readonly HPFS contained some strange + heuristic algorithm that I removed. I thing it's danger to let the + computer decide whether file is text or binary. For example, DJGPP + binaries contain small text message at the beginning and they could be + misidentified and damaged under some circumstances. +check=none,normal,strict (default normal) + Check level. Selecting none will cause only little speedup and big + danger. I tried to write it so that it won't crash if check=normal on + corrupted filesystems. check=strict means many superfluous checks - + used for debugging (for example it checks if file is allocated in + bitmaps when accessing it). +errors=continue,remount-ro,panic (default remount-ro) + Behaviour when filesystem errors found. +chkdsk=no,errors,always (default errors) + When to mark filesystem dirty so that OS/2 checks it. +eas=no,ro,rw (default rw) + What to do with extended attributes. 'no' - ignore them and use always + values specified in uid/gid/mode options. 'ro' - read extended + attributes but do not create them. 'rw' - create extended attributes + when you use chmod/chown/chgrp/mknod/ln -s on the filesystem. +timeshift=(-)nnn (default 0) + Shifts the time by nnn seconds. For example, if you see under linux + one hour more, than under os/2, use timeshift=-3600. + + +File names +========== + +As in OS/2, filenames are case insensitive. However, shell thinks that names +are case sensitive, so for example when you create a file FOO, you can use +'cat FOO', 'cat Foo', 'cat foo' or 'cat F*' but not 'cat f*'. Note, that you +also won't be able to compile linux kernel (and maybe other things) on HPFS +because kernel creates different files with names like bootsect.S and +bootsect.s. When searching for file thats name has characters >= 128, codepages +are used - see below. +OS/2 ignores dots and spaces at the end of file name, so this driver does as +well. If you create 'a. ...', the file 'a' will be created, but you can still +access it under names 'a.', 'a..', 'a . . . ' etc. + + +Extended attributes +=================== + +On HPFS partitions, OS/2 can associate to each file a special information called +extended attributes. Extended attributes are pairs of (key,value) where key is +an ascii string identifying that attribute and value is any string of bytes of +variable length. OS/2 stores window and icon positions and file types there. So +why not use it for unix-specific info like file owner or access rights? This +driver can do it. If you chown/chgrp/chmod on a hpfs partition, extended +attributes with keys "UID", "GID" or "MODE" and 2-byte values are created. Only +that extended attributes those value differs from defaults specified in mount +options are created. Once created, the extended attributes are never deleted, +they're just changed. It means that when your default uid=0 and you type +something like 'chown luser file; chown root file' the file will contain +extended attribute UID=0. And when you umount the fs and mount it again with +uid=luser_uid, the file will be still owned by root! If you chmod file to 444, +extended attribute "MODE" will not be set, this special case is done by setting +read-only flag. When you mknod a block or char device, besides "MODE", the +special 4-byte extended attribute "DEV" will be created containing the device +number. Currently this driver cannot resize extended attributes - it means +that if somebody (I don't know who?) has set "UID", "GID", "MODE" or "DEV" +attributes with different sizes, they won't be rewritten and changing these +values doesn't work. + + +Symlinks +======== + +You can do symlinks on HPFS partition, symlinks are achieved by setting extended +attribute named "SYMLINK" with symlink value. Like on ext2, you can chown and +chgrp symlinks but I don't know what is it good for. chmoding symlink results +in chmoding file where symlink points. These symlinks are just for Linux use and +incompatible with OS/2. OS/2 PmShell symlinks are not supported because they are +stored in very crazy way. They tried to do it so that link changes when file is +moved ... sometimes it works. But the link is partly stored in directory +extended attributes and partly in OS2SYS.INI. I don't want (and don't know how) +to analyze or change OS2SYS.INI. + + +Codepages +========= + +HPFS can contain several uppercasing tables for several codepages and each +file has a pointer to codepage its name is in. However OS/2 was created in +America where people don't care much about codepages and so multiple codepages +support is quite buggy. I have Czech OS/2 working in codepage 852 on my disk. +Once I booted English OS/2 working in cp 850 and I created a file on my 852 +partition. It marked file name codepage as 850 - good. But when I again booted +Czech OS/2, the file was completely inaccessible under any name. It seems that +OS/2 uppercases the search pattern with its system code page (852) and file +name it's comparing to with its code page (850). These could never match. Is it +really what IBM developers wanted? But problems continued. When I created in +Czech OS/2 another file in that directory, that file was inaccessible too. OS/2 +probably uses different uppercasing method when searching where to place a file +(note, that files in HPFS directory must be sorted) and when searching for +a file. Finally when I opened this directory in PmShell, PmShell crashed (the +funny thing was that, when rebooted, PmShell tried to reopen this directory +again :-). chkdsk happily ignores these errors and only low-level disk +modification saved me. Never mix different language versions of OS/2 on one +system although HPFS was designed to allow that. +OK, I could implement complex codepage support to this driver but I think it +would cause more problems than benefit with such buggy implementation in OS/2. +So this driver simply uses first codepage it finds for uppercasing and +lowercasing no matter what's file codepage index. Usually all file names are in +this codepage - if you don't try to do what I described above :-) + + +Known bugs +========== + +HPFS386 on OS/2 server is not supported. HPFS386 installed on normal OS/2 client +should work. If you have OS/2 server, use only read-only mode. I don't know how +to handle some HPFS386 structures like access control list or extended perm +list, I don't know how to delete them when file is deleted and how to not +overwrite them with extended attributes. Send me some info on these structures +and I'll make it. However, this driver should detect presence of HPFS386 +structures, remount read-only and not destroy them (I hope). + +When there's not enough space for extended attributes, they will be truncated +and no error is returned. + +OS/2 can't access files if the path is longer than about 256 chars but this +driver allows you to do it. chkdsk ignores such errors. + +Sometimes you won't be able to delete some files on a very full filesystem +(returning error ENOSPC). That's because file in non-leaf node in directory tree +(one directory, if it's large, has dirents in tree on HPFS) must be replaced +with another node when deleted. And that new file might have larger name than +the old one so the new name doesn't fit in directory node (dnode). And that +would result in directory tree splitting, that takes disk space. Workaround is +to delete other files that are leaf (probability that the file is non-leaf is +about 1/50) or to truncate file first to make some space. +You encounter this problem only if you have many directories so that +preallocated directory band is full i.e.:: + + number_of_directories / size_of_filesystem_in_mb > 4. + +You can't delete open directories. + +You can't rename over directories (what is it good for?). + +Renaming files so that only case changes doesn't work. This driver supports it +but vfs doesn't. Something like 'mv file FILE' won't work. + +All atimes and directory mtimes are not updated. That's because of performance +reasons. If you extremely wish to update them, let me know, I'll write it (but +it will be slow). + +When the system is out of memory and swap, it may slightly corrupt filesystem +(lost files, unbalanced directories). (I guess all filesystem may do it). + +When compiled, you get warning: function declaration isn't a prototype. Does +anybody know what does it mean? + + +What does "unbalanced tree" message mean? +========================================= + +Old versions of this driver created sometimes unbalanced dnode trees. OS/2 +chkdsk doesn't scream if the tree is unbalanced (and sometimes creates +unbalanced trees too :-) but both HPFS and HPFS386 contain bug that it rarely +crashes when the tree is not balanced. This driver handles unbalanced trees +correctly and writes warning if it finds them. If you see this message, this is +probably because of directories created with old version of this driver. +Workaround is to move all files from that directory to another and then back +again. Do it in Linux, not OS/2! If you see this message in directory that is +whole created by this driver, it is BUG - let me know about it. + + +Bugs in OS/2 +============ + +When you have two (or more) lost directories pointing each to other, chkdsk +locks up when repairing filesystem. + +Sometimes (I think it's random) when you create a file with one-char name under +OS/2, OS/2 marks it as 'long'. chkdsk then removes this flag saying "Minor fs +error corrected". + +File names like "a .b" are marked as 'long' by OS/2 but chkdsk "corrects" it and +marks them as short (and writes "minor fs error corrected"). This bug is not in +HPFS386. + +Codepage bugs described above +============================= + +If you don't install fixpacks, there are many, many more... + + +History +======= + +====== ========================================================================= +0.90 First public release +0.91 Fixed bug that caused shooting to memory when write_inode was called on + open inode (rarely happened) +0.92 Fixed a little memory leak in freeing directory inodes +0.93 Fixed bug that locked up the machine when there were too many filenames + with first 15 characters same + Fixed write_file to zero file when writing behind file end +0.94 Fixed a little memory leak when trying to delete busy file or directory +0.95 Fixed a bug that i_hpfs_parent_dir was not updated when moving files +1.90 First version for 2.1.1xx kernels +1.91 Fixed a bug that chk_sectors failed when sectors were at the end of disk + Fixed a race-condition when write_inode is called while deleting file + Fixed a bug that could possibly happen (with very low probability) when + using 0xff in filenames. + + Rewritten locking to avoid race-conditions + + Mount option 'eas' now works + + Fsync no longer returns error + + Files beginning with '.' are marked hidden + + Remount support added + + Alloc is not so slow when filesystem becomes full + + Atimes are no more updated because it slows down operation + + Code cleanup (removed all commented debug prints) +1.92 Corrected a bug when sync was called just before closing file +1.93 Modified, so that it works with kernels >= 2.1.131, I don't know if it + works with previous versions + + Fixed a possible problem with disks > 64G (but I don't have one, so I can't + test it) + + Fixed a file overflow at 2G + + Added new option 'timeshift' + + Changed behaviour on HPFS386: It is now possible to operate on HPFS386 in + read-only mode + + Fixed a bug that slowed down alloc and prevented allocating 100% space + (this bug was not destructive) +1.94 Added workaround for one bug in Linux + + Fixed one buffer leak + + Fixed some incompatibilities with large extended attributes (but it's still + not 100% ok, I have no info on it and OS/2 doesn't want to create them) + + Rewritten allocation + + Fixed a bug with i_blocks (du sometimes didn't display correct values) + + Directories have no longer archive attribute set (some programs don't like + it) + + Fixed a bug that it set badly one flag in large anode tree (it was not + destructive) +1.95 Fixed one buffer leak, that could happen on corrupted filesystem + + Fixed one bug in allocation in 1.94 +1.96 Added workaround for one bug in OS/2 (HPFS locked up, HPFS386 reported + error sometimes when opening directories in PMSHELL) + + Fixed a possible bitmap race + + Fixed possible problem on large disks + + You can now delete open files + + Fixed a nondestructive race in rename +1.97 Support for HPFS v3 (on large partitions) + + ZFixed a bug that it didn't allow creation of files > 128M + (it should be 2G) +1.97.1 Changed names of global symbols + + Fixed a bug when chmoding or chowning root directory +1.98 Fixed a deadlock when using old_readdir + Better directory handling; workaround for "unbalanced tree" bug in OS/2 +1.99 Corrected a possible problem when there's not enough space while deleting + file + + Now it tries to truncate the file if there's not enough space when + deleting + + Removed a lot of redundant code +2.00 Fixed a bug in rename (it was there since 1.96) + Better anti-fragmentation strategy +2.01 Fixed problem with directory listing over NFS + + Directory lseek now checks for proper parameters + + Fixed race-condition in buffer code - it is in all filesystems in Linux; + when reading device (cat /dev/hda) while creating files on it, files + could be damaged +2.02 Workaround for bug in breada in Linux. breada could cause accesses beyond + end of partition +2.03 Char, block devices and pipes are correctly created + + Fixed non-crashing race in unlink (Alexander Viro) + + Now it works with Japanese version of OS/2 +2.04 Fixed error when ftruncate used to extend file +2.05 Fixed crash when got mount parameters without = + + Fixed crash when allocation of anode failed due to full disk + + Fixed some crashes when block io or inode allocation failed +2.06 Fixed some crash on corrupted disk structures + + Better allocation strategy + + Reschedule points added so that it doesn't lock CPU long time + + It should work in read-only mode on Warp Server +2.07 More fixes for Warp Server. Now it really works +2.08 Creating new files is not so slow on large disks + + An attempt to sync deleted file does not generate filesystem error +2.09 Fixed error on extremely fragmented files +====== ========================================================================= diff --git a/Documentation/filesystems/hpfs.txt b/Documentation/filesystems/hpfs.txt deleted file mode 100644 index 74630bd504fb..000000000000 --- a/Documentation/filesystems/hpfs.txt +++ /dev/null @@ -1,296 +0,0 @@ -Read/Write HPFS 2.09 -1998-2004, Mikulas Patocka - -email: mikulas@artax.karlin.mff.cuni.cz -homepage: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi - -CREDITS: -Chris Smith, 1993, original read-only HPFS, some code and hpfs structures file - is taken from it -Jacques Gelinas, MSDos mmap, Inspired by fs/nfs/mmap.c (Jon Tombs 15 Aug 1993) -Werner Almesberger, 1992, 1993, MSDos option parser & CR/LF conversion - -Mount options - -uid=xxx,gid=xxx,umask=xxx (default uid=gid=0 umask=default_system_umask) - Set owner/group/mode for files that do not have it specified in extended - attributes. Mode is inverted umask - for example umask 027 gives owner - all permission, group read permission and anybody else no access. Note - that for files mode is anded with 0666. If you want files to have 'x' - rights, you must use extended attributes. -case=lower,asis (default asis) - File name lowercasing in readdir. -conv=binary,text,auto (default binary) - CR/LF -> LF conversion, if auto, decision is made according to extension - - there is a list of text extensions (I thing it's better to not convert - text file than to damage binary file). If you want to change that list, - change it in the source. Original readonly HPFS contained some strange - heuristic algorithm that I removed. I thing it's danger to let the - computer decide whether file is text or binary. For example, DJGPP - binaries contain small text message at the beginning and they could be - misidentified and damaged under some circumstances. -check=none,normal,strict (default normal) - Check level. Selecting none will cause only little speedup and big - danger. I tried to write it so that it won't crash if check=normal on - corrupted filesystems. check=strict means many superfluous checks - - used for debugging (for example it checks if file is allocated in - bitmaps when accessing it). -errors=continue,remount-ro,panic (default remount-ro) - Behaviour when filesystem errors found. -chkdsk=no,errors,always (default errors) - When to mark filesystem dirty so that OS/2 checks it. -eas=no,ro,rw (default rw) - What to do with extended attributes. 'no' - ignore them and use always - values specified in uid/gid/mode options. 'ro' - read extended - attributes but do not create them. 'rw' - create extended attributes - when you use chmod/chown/chgrp/mknod/ln -s on the filesystem. -timeshift=(-)nnn (default 0) - Shifts the time by nnn seconds. For example, if you see under linux - one hour more, than under os/2, use timeshift=-3600. - - -File names - -As in OS/2, filenames are case insensitive. However, shell thinks that names -are case sensitive, so for example when you create a file FOO, you can use -'cat FOO', 'cat Foo', 'cat foo' or 'cat F*' but not 'cat f*'. Note, that you -also won't be able to compile linux kernel (and maybe other things) on HPFS -because kernel creates different files with names like bootsect.S and -bootsect.s. When searching for file thats name has characters >= 128, codepages -are used - see below. -OS/2 ignores dots and spaces at the end of file name, so this driver does as -well. If you create 'a. ...', the file 'a' will be created, but you can still -access it under names 'a.', 'a..', 'a . . . ' etc. - - -Extended attributes - -On HPFS partitions, OS/2 can associate to each file a special information called -extended attributes. Extended attributes are pairs of (key,value) where key is -an ascii string identifying that attribute and value is any string of bytes of -variable length. OS/2 stores window and icon positions and file types there. So -why not use it for unix-specific info like file owner or access rights? This -driver can do it. If you chown/chgrp/chmod on a hpfs partition, extended -attributes with keys "UID", "GID" or "MODE" and 2-byte values are created. Only -that extended attributes those value differs from defaults specified in mount -options are created. Once created, the extended attributes are never deleted, -they're just changed. It means that when your default uid=0 and you type -something like 'chown luser file; chown root file' the file will contain -extended attribute UID=0. And when you umount the fs and mount it again with -uid=luser_uid, the file will be still owned by root! If you chmod file to 444, -extended attribute "MODE" will not be set, this special case is done by setting -read-only flag. When you mknod a block or char device, besides "MODE", the -special 4-byte extended attribute "DEV" will be created containing the device -number. Currently this driver cannot resize extended attributes - it means -that if somebody (I don't know who?) has set "UID", "GID", "MODE" or "DEV" -attributes with different sizes, they won't be rewritten and changing these -values doesn't work. - - -Symlinks - -You can do symlinks on HPFS partition, symlinks are achieved by setting extended -attribute named "SYMLINK" with symlink value. Like on ext2, you can chown and -chgrp symlinks but I don't know what is it good for. chmoding symlink results -in chmoding file where symlink points. These symlinks are just for Linux use and -incompatible with OS/2. OS/2 PmShell symlinks are not supported because they are -stored in very crazy way. They tried to do it so that link changes when file is -moved ... sometimes it works. But the link is partly stored in directory -extended attributes and partly in OS2SYS.INI. I don't want (and don't know how) -to analyze or change OS2SYS.INI. - - -Codepages - -HPFS can contain several uppercasing tables for several codepages and each -file has a pointer to codepage its name is in. However OS/2 was created in -America where people don't care much about codepages and so multiple codepages -support is quite buggy. I have Czech OS/2 working in codepage 852 on my disk. -Once I booted English OS/2 working in cp 850 and I created a file on my 852 -partition. It marked file name codepage as 850 - good. But when I again booted -Czech OS/2, the file was completely inaccessible under any name. It seems that -OS/2 uppercases the search pattern with its system code page (852) and file -name it's comparing to with its code page (850). These could never match. Is it -really what IBM developers wanted? But problems continued. When I created in -Czech OS/2 another file in that directory, that file was inaccessible too. OS/2 -probably uses different uppercasing method when searching where to place a file -(note, that files in HPFS directory must be sorted) and when searching for -a file. Finally when I opened this directory in PmShell, PmShell crashed (the -funny thing was that, when rebooted, PmShell tried to reopen this directory -again :-). chkdsk happily ignores these errors and only low-level disk -modification saved me. Never mix different language versions of OS/2 on one -system although HPFS was designed to allow that. -OK, I could implement complex codepage support to this driver but I think it -would cause more problems than benefit with such buggy implementation in OS/2. -So this driver simply uses first codepage it finds for uppercasing and -lowercasing no matter what's file codepage index. Usually all file names are in -this codepage - if you don't try to do what I described above :-) - - -Known bugs - -HPFS386 on OS/2 server is not supported. HPFS386 installed on normal OS/2 client -should work. If you have OS/2 server, use only read-only mode. I don't know how -to handle some HPFS386 structures like access control list or extended perm -list, I don't know how to delete them when file is deleted and how to not -overwrite them with extended attributes. Send me some info on these structures -and I'll make it. However, this driver should detect presence of HPFS386 -structures, remount read-only and not destroy them (I hope). - -When there's not enough space for extended attributes, they will be truncated -and no error is returned. - -OS/2 can't access files if the path is longer than about 256 chars but this -driver allows you to do it. chkdsk ignores such errors. - -Sometimes you won't be able to delete some files on a very full filesystem -(returning error ENOSPC). That's because file in non-leaf node in directory tree -(one directory, if it's large, has dirents in tree on HPFS) must be replaced -with another node when deleted. And that new file might have larger name than -the old one so the new name doesn't fit in directory node (dnode). And that -would result in directory tree splitting, that takes disk space. Workaround is -to delete other files that are leaf (probability that the file is non-leaf is -about 1/50) or to truncate file first to make some space. -You encounter this problem only if you have many directories so that -preallocated directory band is full i.e. - number_of_directories / size_of_filesystem_in_mb > 4. - -You can't delete open directories. - -You can't rename over directories (what is it good for?). - -Renaming files so that only case changes doesn't work. This driver supports it -but vfs doesn't. Something like 'mv file FILE' won't work. - -All atimes and directory mtimes are not updated. That's because of performance -reasons. If you extremely wish to update them, let me know, I'll write it (but -it will be slow). - -When the system is out of memory and swap, it may slightly corrupt filesystem -(lost files, unbalanced directories). (I guess all filesystem may do it). - -When compiled, you get warning: function declaration isn't a prototype. Does -anybody know what does it mean? - - -What does "unbalanced tree" message mean? - -Old versions of this driver created sometimes unbalanced dnode trees. OS/2 -chkdsk doesn't scream if the tree is unbalanced (and sometimes creates -unbalanced trees too :-) but both HPFS and HPFS386 contain bug that it rarely -crashes when the tree is not balanced. This driver handles unbalanced trees -correctly and writes warning if it finds them. If you see this message, this is -probably because of directories created with old version of this driver. -Workaround is to move all files from that directory to another and then back -again. Do it in Linux, not OS/2! If you see this message in directory that is -whole created by this driver, it is BUG - let me know about it. - - -Bugs in OS/2 - -When you have two (or more) lost directories pointing each to other, chkdsk -locks up when repairing filesystem. - -Sometimes (I think it's random) when you create a file with one-char name under -OS/2, OS/2 marks it as 'long'. chkdsk then removes this flag saying "Minor fs -error corrected". - -File names like "a .b" are marked as 'long' by OS/2 but chkdsk "corrects" it and -marks them as short (and writes "minor fs error corrected"). This bug is not in -HPFS386. - -Codepage bugs described above. - -If you don't install fixpacks, there are many, many more... - - -History - -0.90 First public release -0.91 Fixed bug that caused shooting to memory when write_inode was called on - open inode (rarely happened) -0.92 Fixed a little memory leak in freeing directory inodes -0.93 Fixed bug that locked up the machine when there were too many filenames - with first 15 characters same - Fixed write_file to zero file when writing behind file end -0.94 Fixed a little memory leak when trying to delete busy file or directory -0.95 Fixed a bug that i_hpfs_parent_dir was not updated when moving files -1.90 First version for 2.1.1xx kernels -1.91 Fixed a bug that chk_sectors failed when sectors were at the end of disk - Fixed a race-condition when write_inode is called while deleting file - Fixed a bug that could possibly happen (with very low probability) when - using 0xff in filenames - Rewritten locking to avoid race-conditions - Mount option 'eas' now works - Fsync no longer returns error - Files beginning with '.' are marked hidden - Remount support added - Alloc is not so slow when filesystem becomes full - Atimes are no more updated because it slows down operation - Code cleanup (removed all commented debug prints) -1.92 Corrected a bug when sync was called just before closing file -1.93 Modified, so that it works with kernels >= 2.1.131, I don't know if it - works with previous versions - Fixed a possible problem with disks > 64G (but I don't have one, so I can't - test it) - Fixed a file overflow at 2G - Added new option 'timeshift' - Changed behaviour on HPFS386: It is now possible to operate on HPFS386 in - read-only mode - Fixed a bug that slowed down alloc and prevented allocating 100% space - (this bug was not destructive) -1.94 Added workaround for one bug in Linux - Fixed one buffer leak - Fixed some incompatibilities with large extended attributes (but it's still - not 100% ok, I have no info on it and OS/2 doesn't want to create them) - Rewritten allocation - Fixed a bug with i_blocks (du sometimes didn't display correct values) - Directories have no longer archive attribute set (some programs don't like - it) - Fixed a bug that it set badly one flag in large anode tree (it was not - destructive) -1.95 Fixed one buffer leak, that could happen on corrupted filesystem - Fixed one bug in allocation in 1.94 -1.96 Added workaround for one bug in OS/2 (HPFS locked up, HPFS386 reported - error sometimes when opening directories in PMSHELL) - Fixed a possible bitmap race - Fixed possible problem on large disks - You can now delete open files - Fixed a nondestructive race in rename -1.97 Support for HPFS v3 (on large partitions) - Fixed a bug that it didn't allow creation of files > 128M (it should be 2G) -1.97.1 Changed names of global symbols - Fixed a bug when chmoding or chowning root directory -1.98 Fixed a deadlock when using old_readdir - Better directory handling; workaround for "unbalanced tree" bug in OS/2 -1.99 Corrected a possible problem when there's not enough space while deleting - file - Now it tries to truncate the file if there's not enough space when deleting - Removed a lot of redundant code -2.00 Fixed a bug in rename (it was there since 1.96) - Better anti-fragmentation strategy -2.01 Fixed problem with directory listing over NFS - Directory lseek now checks for proper parameters - Fixed race-condition in buffer code - it is in all filesystems in Linux; - when reading device (cat /dev/hda) while creating files on it, files - could be damaged -2.02 Workaround for bug in breada in Linux. breada could cause accesses beyond - end of partition -2.03 Char, block devices and pipes are correctly created - Fixed non-crashing race in unlink (Alexander Viro) - Now it works with Japanese version of OS/2 -2.04 Fixed error when ftruncate used to extend file -2.05 Fixed crash when got mount parameters without = - Fixed crash when allocation of anode failed due to full disk - Fixed some crashes when block io or inode allocation failed -2.06 Fixed some crash on corrupted disk structures - Better allocation strategy - Reschedule points added so that it doesn't lock CPU long time - It should work in read-only mode on Warp Server -2.07 More fixes for Warp Server. Now it really works -2.08 Creating new files is not so slow on large disks - An attempt to sync deleted file does not generate filesystem error -2.09 Fixed error on extremely fragmented files - - - vim: set textwidth=80: diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index f776411340cb..3fbe2fa0b5c5 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -69,6 +69,7 @@ Documentation for filesystem implementations. gfs2-uevents hfs hfsplus + hpfs fuse overlayfs virtiofs -- cgit v1.2.3 From de389cf08d4708d0a03516e5ce0e193f49f0b358 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:10 +0100 Subject: docs: filesystems: convert inotify.txt to ReST - Add a SPDX header; - Add a document title; - Adjust document title; - Fix list markups; - Some whitespace fixes and new line breaks; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Jan Kara Link: https://lore.kernel.org/r/8f846843ecf1914988feb4d001e3a53d27dc1a65.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/inotify.rst | 90 +++++++++++++++++++++++++++++++++++ Documentation/filesystems/inotify.txt | 79 ------------------------------ 3 files changed, 91 insertions(+), 79 deletions(-) create mode 100644 Documentation/filesystems/inotify.rst delete mode 100644 Documentation/filesystems/inotify.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 3fbe2fa0b5c5..5a737722652c 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -70,6 +70,7 @@ Documentation for filesystem implementations. hfs hfsplus hpfs + inotify fuse overlayfs virtiofs diff --git a/Documentation/filesystems/inotify.rst b/Documentation/filesystems/inotify.rst new file mode 100644 index 000000000000..7f7ef8af0e1e --- /dev/null +++ b/Documentation/filesystems/inotify.rst @@ -0,0 +1,90 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================================================== +Inotify - A Powerful yet Simple File Change Notification System +=============================================================== + + + +Document started 15 Mar 2005 by Robert Love + +Document updated 4 Jan 2015 by Zhang Zhen + + - Deleted obsoleted interface, just refer to manpages for user interface. + +(i) Rationale + +Q: + What is the design decision behind not tying the watch to the open fd of + the watched object? + +A: + Watches are associated with an open inotify device, not an open file. + This solves the primary problem with dnotify: keeping the file open pins + the file and thus, worse, pins the mount. Dnotify is therefore infeasible + for use on a desktop system with removable media as the media cannot be + unmounted. Watching a file should not require that it be open. + +Q: + What is the design decision behind using an-fd-per-instance as opposed to + an fd-per-watch? + +A: + An fd-per-watch quickly consumes more file descriptors than are allowed, + more fd's than are feasible to manage, and more fd's than are optimally + select()-able. Yes, root can bump the per-process fd limit and yes, users + can use epoll, but requiring both is a silly and extraneous requirement. + A watch consumes less memory than an open file, separating the number + spaces is thus sensible. The current design is what user-space developers + want: Users initialize inotify, once, and add n watches, requiring but one + fd and no twiddling with fd limits. Initializing an inotify instance two + thousand times is silly. If we can implement user-space's preferences + cleanly--and we can, the idr layer makes stuff like this trivial--then we + should. + + There are other good arguments. With a single fd, there is a single + item to block on, which is mapped to a single queue of events. The single + fd returns all watch events and also any potential out-of-band data. If + every fd was a separate watch, + + - There would be no way to get event ordering. Events on file foo and + file bar would pop poll() on both fd's, but there would be no way to tell + which happened first. A single queue trivially gives you ordering. Such + ordering is crucial to existing applications such as Beagle. Imagine + "mv a b ; mv b a" events without ordering. + + - We'd have to maintain n fd's and n internal queues with state, + versus just one. It is a lot messier in the kernel. A single, linear + queue is the data structure that makes sense. + + - User-space developers prefer the current API. The Beagle guys, for + example, love it. Trust me, I asked. It is not a surprise: Who'd want + to manage and block on 1000 fd's via select? + + - No way to get out of band data. + + - 1024 is still too low. ;-) + + When you talk about designing a file change notification system that + scales to 1000s of directories, juggling 1000s of fd's just does not seem + the right interface. It is too heavy. + + Additionally, it _is_ possible to more than one instance and + juggle more than one queue and thus more than one associated fd. There + need not be a one-fd-per-process mapping; it is one-fd-per-queue and a + process can easily want more than one queue. + +Q: + Why the system call approach? + +A: + The poor user-space interface is the second biggest problem with dnotify. + Signals are a terrible, terrible interface for file notification. Or for + anything, for that matter. The ideal solution, from all perspectives, is a + file descriptor-based one that allows basic file I/O and poll/select. + Obtaining the fd and managing the watches could have been done either via a + device file or a family of new system calls. We decided to implement a + family of system calls because that is the preferred approach for new kernel + interfaces. The only real difference was whether we wanted to use open(2) + and ioctl(2) or a couple of new system calls. System calls beat ioctls. + diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.txt deleted file mode 100644 index 51f61db787fb..000000000000 --- a/Documentation/filesystems/inotify.txt +++ /dev/null @@ -1,79 +0,0 @@ - inotify - a powerful yet simple file change notification system - - - -Document started 15 Mar 2005 by Robert Love -Document updated 4 Jan 2015 by Zhang Zhen - --Deleted obsoleted interface, just refer to manpages for user interface. - -(i) Rationale - -Q: What is the design decision behind not tying the watch to the open fd of - the watched object? - -A: Watches are associated with an open inotify device, not an open file. - This solves the primary problem with dnotify: keeping the file open pins - the file and thus, worse, pins the mount. Dnotify is therefore infeasible - for use on a desktop system with removable media as the media cannot be - unmounted. Watching a file should not require that it be open. - -Q: What is the design decision behind using an-fd-per-instance as opposed to - an fd-per-watch? - -A: An fd-per-watch quickly consumes more file descriptors than are allowed, - more fd's than are feasible to manage, and more fd's than are optimally - select()-able. Yes, root can bump the per-process fd limit and yes, users - can use epoll, but requiring both is a silly and extraneous requirement. - A watch consumes less memory than an open file, separating the number - spaces is thus sensible. The current design is what user-space developers - want: Users initialize inotify, once, and add n watches, requiring but one - fd and no twiddling with fd limits. Initializing an inotify instance two - thousand times is silly. If we can implement user-space's preferences - cleanly--and we can, the idr layer makes stuff like this trivial--then we - should. - - There are other good arguments. With a single fd, there is a single - item to block on, which is mapped to a single queue of events. The single - fd returns all watch events and also any potential out-of-band data. If - every fd was a separate watch, - - - There would be no way to get event ordering. Events on file foo and - file bar would pop poll() on both fd's, but there would be no way to tell - which happened first. A single queue trivially gives you ordering. Such - ordering is crucial to existing applications such as Beagle. Imagine - "mv a b ; mv b a" events without ordering. - - - We'd have to maintain n fd's and n internal queues with state, - versus just one. It is a lot messier in the kernel. A single, linear - queue is the data structure that makes sense. - - - User-space developers prefer the current API. The Beagle guys, for - example, love it. Trust me, I asked. It is not a surprise: Who'd want - to manage and block on 1000 fd's via select? - - - No way to get out of band data. - - - 1024 is still too low. ;-) - - When you talk about designing a file change notification system that - scales to 1000s of directories, juggling 1000s of fd's just does not seem - the right interface. It is too heavy. - - Additionally, it _is_ possible to more than one instance and - juggle more than one queue and thus more than one associated fd. There - need not be a one-fd-per-process mapping; it is one-fd-per-queue and a - process can easily want more than one queue. - -Q: Why the system call approach? - -A: The poor user-space interface is the second biggest problem with dnotify. - Signals are a terrible, terrible interface for file notification. Or for - anything, for that matter. The ideal solution, from all perspectives, is a - file descriptor-based one that allows basic file I/O and poll/select. - Obtaining the fd and managing the watches could have been done either via a - device file or a family of new system calls. We decided to implement a - family of system calls because that is the preferred approach for new kernel - interfaces. The only real difference was whether we wanted to use open(2) - and ioctl(2) or a couple of new system calls. System calls beat ioctls. - -- cgit v1.2.3 From 76f216855b6bd1027e236b29cd7fece7336c37eb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:11 +0100 Subject: docs: filesystems: convert isofs.txt to ReST - Add a SPDX header; - Add a document title; - Some whitespace fixes and new line breaks; - Add table markups; - Add lists markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/ec16dc09d0c23bb0c1af3d3f33a96896083a1d36.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/isofs.rst | 64 +++++++++++++++++++++++++++++++++++++ Documentation/filesystems/isofs.txt | 48 ---------------------------- 3 files changed, 65 insertions(+), 48 deletions(-) create mode 100644 Documentation/filesystems/isofs.rst delete mode 100644 Documentation/filesystems/isofs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 5a737722652c..8c8813ada53f 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -71,6 +71,7 @@ Documentation for filesystem implementations. hfsplus hpfs inotify + isofs fuse overlayfs virtiofs diff --git a/Documentation/filesystems/isofs.rst b/Documentation/filesystems/isofs.rst new file mode 100644 index 000000000000..08fd469091d4 --- /dev/null +++ b/Documentation/filesystems/isofs.rst @@ -0,0 +1,64 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================== +ISO9660 Filesystem +================== + +Mount options that are the same as for msdos and vfat partitions. + + ========= ======================================================== + gid=nnn All files in the partition will be in group nnn. + uid=nnn All files in the partition will be owned by user id nnn. + umask=nnn The permission mask (see umask(1)) for the partition. + ========= ======================================================== + +Mount options that are the same as vfat partitions. These are only useful +when using discs encoded using Microsoft's Joliet extensions. + + ============== ============================================================= + iocharset=name Character set to use for converting from Unicode to + ASCII. Joliet filenames are stored in Unicode format, but + Unix for the most part doesn't know how to deal with Unicode. + There is also an option of doing UTF-8 translations with the + utf8 option. + utf8 Encode Unicode names in UTF-8 format. Default is no. + ============== ============================================================= + +Mount options unique to the isofs filesystem. + + ================= ============================================================ + block=512 Set the block size for the disk to 512 bytes + block=1024 Set the block size for the disk to 1024 bytes + block=2048 Set the block size for the disk to 2048 bytes + check=relaxed Matches filenames with different cases + check=strict Matches only filenames with the exact same case + cruft Try to handle badly formatted CDs. + map=off Do not map non-Rock Ridge filenames to lower case + map=normal Map non-Rock Ridge filenames to lower case + map=acorn As map=normal but also apply Acorn extensions if present + mode=xxx Sets the permissions on files to xxx unless Rock Ridge + extensions set the permissions otherwise + dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge + extensions set the permissions otherwise + overriderockperm Set permissions on files and directories according to + 'mode' and 'dmode' even though Rock Ridge extensions are + present. + nojoliet Ignore Joliet extensions if they are present. + norock Ignore Rock Ridge extensions if they are present. + hide Completely strip hidden files from the file system. + showassoc Show files marked with the 'associated' bit + unhide Deprecated; showing hidden files is now default; + If given, it is a synonym for 'showassoc' which will + recreate previous unhide behavior + session=x Select number of session on multisession CD + sbsector=xxx Session begins from sector xxx + ================= ============================================================ + +Recommended documents about ISO 9660 standard are located at: + +- http://www.y-adagio.com/ +- ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf + +Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically +identical with ISO 9660.", so it is a valid and gratis substitute of the +official ISO specification. diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt deleted file mode 100644 index ba0a93384de0..000000000000 --- a/Documentation/filesystems/isofs.txt +++ /dev/null @@ -1,48 +0,0 @@ -Mount options that are the same as for msdos and vfat partitions. - - gid=nnn All files in the partition will be in group nnn. - uid=nnn All files in the partition will be owned by user id nnn. - umask=nnn The permission mask (see umask(1)) for the partition. - -Mount options that are the same as vfat partitions. These are only useful -when using discs encoded using Microsoft's Joliet extensions. - iocharset=name Character set to use for converting from Unicode to - ASCII. Joliet filenames are stored in Unicode format, but - Unix for the most part doesn't know how to deal with Unicode. - There is also an option of doing UTF-8 translations with the - utf8 option. - utf8 Encode Unicode names in UTF-8 format. Default is no. - -Mount options unique to the isofs filesystem. - block=512 Set the block size for the disk to 512 bytes - block=1024 Set the block size for the disk to 1024 bytes - block=2048 Set the block size for the disk to 2048 bytes - check=relaxed Matches filenames with different cases - check=strict Matches only filenames with the exact same case - cruft Try to handle badly formatted CDs. - map=off Do not map non-Rock Ridge filenames to lower case - map=normal Map non-Rock Ridge filenames to lower case - map=acorn As map=normal but also apply Acorn extensions if present - mode=xxx Sets the permissions on files to xxx unless Rock Ridge - extensions set the permissions otherwise - dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge - extensions set the permissions otherwise - overriderockperm Set permissions on files and directories according to - 'mode' and 'dmode' even though Rock Ridge extensions are - present. - nojoliet Ignore Joliet extensions if they are present. - norock Ignore Rock Ridge extensions if they are present. - hide Completely strip hidden files from the file system. - showassoc Show files marked with the 'associated' bit - unhide Deprecated; showing hidden files is now default; - If given, it is a synonym for 'showassoc' which will - recreate previous unhide behavior - session=x Select number of session on multisession CD - sbsector=xxx Session begins from sector xxx - -Recommended documents about ISO 9660 standard are located at: -http://www.y-adagio.com/ -ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf -Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically -identical with ISO 9660.", so it is a valid and gratis substitute of the -official ISO specification. -- cgit v1.2.3 From 2640c19dcab0f6530007dfb4ee5870f5d61b0772 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:12 +0100 Subject: docs: filesystems: convert nilfs2.txt to ReST - Add a SPDX header; - Add a document title; - Adjust document title; - Mark literal blocks as such; - use :field: markup; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/f7989ca501585f5990fffd2d365cfca4fe9fdd6f.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 3 +- Documentation/filesystems/nilfs2.rst | 286 +++++++++++++++++++++++++++++++++++ Documentation/filesystems/nilfs2.txt | 276 --------------------------------- 3 files changed, 288 insertions(+), 277 deletions(-) create mode 100644 Documentation/filesystems/nilfs2.rst delete mode 100644 Documentation/filesystems/nilfs2.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 8c8813ada53f..01587704fcc9 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -70,9 +70,10 @@ Documentation for filesystem implementations. hfs hfsplus hpfs + fuse inotify isofs - fuse + nilfs2 overlayfs virtiofs vfat diff --git a/Documentation/filesystems/nilfs2.rst b/Documentation/filesystems/nilfs2.rst new file mode 100644 index 000000000000..6c49f04e9e0a --- /dev/null +++ b/Documentation/filesystems/nilfs2.rst @@ -0,0 +1,286 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====== +NILFS2 +====== + +NILFS2 is a log-structured file system (LFS) supporting continuous +snapshotting. In addition to versioning capability of the entire file +system, users can even restore files mistakenly overwritten or +destroyed just a few seconds ago. Since NILFS2 can keep consistency +like conventional LFS, it achieves quick recovery after system +crashes. + +NILFS2 creates a number of checkpoints every few seconds or per +synchronous write basis (unless there is no change). Users can select +significant versions among continuously created checkpoints, and can +change them into snapshots which will be preserved until they are +changed back to checkpoints. + +There is no limit on the number of snapshots until the volume gets +full. Each snapshot is mountable as a read-only file system +concurrently with its writable mount, and this feature is convenient +for online backup. + +The userland tools are included in nilfs-utils package, which is +available from the following download page. At least "mkfs.nilfs2", +"mount.nilfs2", "umount.nilfs2", and "nilfs_cleanerd" (so called +cleaner or garbage collector) are required. Details on the tools are +described in the man pages included in the package. + +:Project web page: https://nilfs.sourceforge.io/ +:Download page: https://nilfs.sourceforge.io/en/download.html +:List info: http://vger.kernel.org/vger-lists.html#linux-nilfs + +Caveats +======= + +Features which NILFS2 does not support yet: + + - atime + - extended attributes + - POSIX ACLs + - quotas + - fsck + - defragmentation + +Mount options +============= + +NILFS2 supports the following mount options: +(*) == default + +======================= ======================================================= +barrier(*) This enables/disables the use of write barriers. This +nobarrier requires an IO stack which can support barriers, and + if nilfs gets an error on a barrier write, it will + disable again with a warning. +errors=continue Keep going on a filesystem error. +errors=remount-ro(*) Remount the filesystem read-only on an error. +errors=panic Panic and halt the machine if an error occurs. +cp=n Specify the checkpoint-number of the snapshot to be + mounted. Checkpoints and snapshots are listed by lscp + user command. Only the checkpoints marked as snapshot + are mountable with this option. Snapshot is read-only, + so a read-only mount option must be specified together. +order=relaxed(*) Apply relaxed order semantics that allows modified data + blocks to be written to disk without making a + checkpoint if no metadata update is going. This mode + is equivalent to the ordered data mode of the ext3 + filesystem except for the updates on data blocks still + conserve atomicity. This will improve synchronous + write performance for overwriting. +order=strict Apply strict in-order semantics that preserves sequence + of all file operations including overwriting of data + blocks. That means, it is guaranteed that no + overtaking of events occurs in the recovered file + system after a crash. +norecovery Disable recovery of the filesystem on mount. + This disables every write access on the device for + read-only mounts or snapshots. This option will fail + for r/w mounts on an unclean volume. +discard This enables/disables the use of discard/TRIM commands. +nodiscard(*) The discard/TRIM commands are sent to the underlying + block device when blocks are freed. This is useful + for SSD devices and sparse/thinly-provisioned LUNs. +======================= ======================================================= + +Ioctls +====== + +There is some NILFS2 specific functionality which can be accessed by applications +through the system call interfaces. The list of all NILFS2 specific ioctls are +shown in the table below. + +Table of NILFS2 specific ioctls: + + ============================== =============================================== + Ioctl Description + ============================== =============================================== + NILFS_IOCTL_CHANGE_CPMODE Change mode of given checkpoint between + checkpoint and snapshot state. This ioctl is + used in chcp and mkcp utilities. + + NILFS_IOCTL_DELETE_CHECKPOINT Remove checkpoint from NILFS2 file system. + This ioctl is used in rmcp utility. + + NILFS_IOCTL_GET_CPINFO Return info about requested checkpoints. This + ioctl is used in lscp utility and by + nilfs_cleanerd daemon. + + NILFS_IOCTL_GET_CPSTAT Return checkpoints statistics. This ioctl is + used by lscp, rmcp utilities and by + nilfs_cleanerd daemon. + + NILFS_IOCTL_GET_SUINFO Return segment usage info about requested + segments. This ioctl is used in lssu, + nilfs_resize utilities and by nilfs_cleanerd + daemon. + + NILFS_IOCTL_SET_SUINFO Modify segment usage info of requested + segments. This ioctl is used by + nilfs_cleanerd daemon to skip unnecessary + cleaning operation of segments and reduce + performance penalty or wear of flash device + due to redundant move of in-use blocks. + + NILFS_IOCTL_GET_SUSTAT Return segment usage statistics. This ioctl + is used in lssu, nilfs_resize utilities and + by nilfs_cleanerd daemon. + + NILFS_IOCTL_GET_VINFO Return information on virtual block addresses. + This ioctl is used by nilfs_cleanerd daemon. + + NILFS_IOCTL_GET_BDESCS Return information about descriptors of disk + block numbers. This ioctl is used by + nilfs_cleanerd daemon. + + NILFS_IOCTL_CLEAN_SEGMENTS Do garbage collection operation in the + environment of requested parameters from + userspace. This ioctl is used by + nilfs_cleanerd daemon. + + NILFS_IOCTL_SYNC Make a checkpoint. This ioctl is used in + mkcp utility. + + NILFS_IOCTL_RESIZE Resize NILFS2 volume. This ioctl is used + by nilfs_resize utility. + + NILFS_IOCTL_SET_ALLOC_RANGE Define lower limit of segments in bytes and + upper limit of segments in bytes. This ioctl + is used by nilfs_resize utility. + ============================== =============================================== + +NILFS2 usage +============ + +To use nilfs2 as a local file system, simply:: + + # mkfs -t nilfs2 /dev/block_device + # mount -t nilfs2 /dev/block_device /dir + +This will also invoke the cleaner through the mount helper program +(mount.nilfs2). + +Checkpoints and snapshots are managed by the following commands. +Their manpages are included in the nilfs-utils package above. + + ==== =========================================================== + lscp list checkpoints or snapshots. + mkcp make a checkpoint or a snapshot. + chcp change an existing checkpoint to a snapshot or vice versa. + rmcp invalidate specified checkpoint(s). + ==== =========================================================== + +To mount a snapshot:: + + # mount -t nilfs2 -r -o cp= /dev/block_device /snap_dir + +where is the checkpoint number of the snapshot. + +To unmount the NILFS2 mount point or snapshot, simply:: + + # umount /dir + +Then, the cleaner daemon is automatically shut down by the umount +helper program (umount.nilfs2). + +Disk format +=========== + +A nilfs2 volume is equally divided into a number of segments except +for the super block (SB) and segment #0. A segment is the container +of logs. Each log is composed of summary information blocks, payload +blocks, and an optional super root block (SR):: + + ______________________________________________________ + | |SB| | Segment | Segment | Segment | ... | Segment | | + |_|__|_|____0____|____1____|____2____|_____|____N____|_| + 0 +1K +4K +8M +16M +24M +(8MB x N) + . . (Typical offsets for 4KB-block) + . . + .______________________. + | log | log |... | log | + |__1__|__2__|____|__m__| + . . + . . + . . + .______________________________. + | Summary | Payload blocks |SR| + |_blocks__|_________________|__| + +The payload blocks are organized per file, and each file consists of +data blocks and B-tree node blocks:: + + |<--- File-A --->|<--- File-B --->| + _______________________________________________________________ + | Data blocks | B-tree blocks | Data blocks | B-tree blocks | ... + _|_____________|_______________|_____________|_______________|_ + + +Since only the modified blocks are written in the log, it may have +files without data blocks or B-tree node blocks. + +The organization of the blocks is recorded in the summary information +blocks, which contains a header structure (nilfs_segment_summary), per +file structures (nilfs_finfo), and per block structures (nilfs_binfo):: + + _________________________________________________________________________ + | Summary | finfo | binfo | ... | binfo | finfo | binfo | ... | binfo |... + |_blocks__|___A___|_(A,1)_|_____|(A,Na)_|___B___|_(B,1)_|_____|(B,Nb)_|___ + + +The logs include regular files, directory files, symbolic link files +and several meta data files. The mata data files are the files used +to maintain file system meta data. The current version of NILFS2 uses +the following meta data files:: + + 1) Inode file (ifile) -- Stores on-disk inodes + 2) Checkpoint file (cpfile) -- Stores checkpoints + 3) Segment usage file (sufile) -- Stores allocation state of segments + 4) Data address translation file -- Maps virtual block numbers to usual + (DAT) block numbers. This file serves to + make on-disk blocks relocatable. + +The following figure shows a typical organization of the logs:: + + _________________________________________________________________________ + | Summary | regular file | file | ... | ifile | cpfile | sufile | DAT |SR| + |_blocks__|_or_directory_|_______|_____|_______|________|________|_____|__| + + +To stride over segment boundaries, this sequence of files may be split +into multiple logs. The sequence of logs that should be treated as +logically one log, is delimited with flags marked in the segment +summary. The recovery code of nilfs2 looks this boundary information +to ensure atomicity of updates. + +The super root block is inserted for every checkpoints. It includes +three special inodes, inodes for the DAT, cpfile, and sufile. Inodes +of regular files, directories, symlinks and other special files, are +included in the ifile. The inode of ifile itself is included in the +corresponding checkpoint entry in the cpfile. Thus, the hierarchy +among NILFS2 files can be depicted as follows:: + + Super block (SB) + | + v + Super root block (the latest cno=xx) + |-- DAT + |-- sufile + `-- cpfile + |-- ifile (cno=c1) + |-- ifile (cno=c2) ---- file (ino=i1) + : : |-- file (ino=i2) + `-- ifile (cno=xx) |-- file (ino=i3) + : : + `-- file (ino=yy) + ( regular file, directory, or symlink ) + +For detail on the format of each file, please see nilfs2_ondisk.h +located at include/uapi/linux directory. + +There are no patents or other intellectual property that we protect +with regard to the design of NILFS2. It is allowed to replicate the +design in hopes that other operating systems could share (mount, read, +write, etc.) data stored in this format. diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt deleted file mode 100644 index f2f3f8592a6f..000000000000 --- a/Documentation/filesystems/nilfs2.txt +++ /dev/null @@ -1,276 +0,0 @@ -NILFS2 ------- - -NILFS2 is a log-structured file system (LFS) supporting continuous -snapshotting. In addition to versioning capability of the entire file -system, users can even restore files mistakenly overwritten or -destroyed just a few seconds ago. Since NILFS2 can keep consistency -like conventional LFS, it achieves quick recovery after system -crashes. - -NILFS2 creates a number of checkpoints every few seconds or per -synchronous write basis (unless there is no change). Users can select -significant versions among continuously created checkpoints, and can -change them into snapshots which will be preserved until they are -changed back to checkpoints. - -There is no limit on the number of snapshots until the volume gets -full. Each snapshot is mountable as a read-only file system -concurrently with its writable mount, and this feature is convenient -for online backup. - -The userland tools are included in nilfs-utils package, which is -available from the following download page. At least "mkfs.nilfs2", -"mount.nilfs2", "umount.nilfs2", and "nilfs_cleanerd" (so called -cleaner or garbage collector) are required. Details on the tools are -described in the man pages included in the package. - -Project web page: https://nilfs.sourceforge.io/ -Download page: https://nilfs.sourceforge.io/en/download.html -List info: http://vger.kernel.org/vger-lists.html#linux-nilfs - -Caveats -======= - -Features which NILFS2 does not support yet: - - - atime - - extended attributes - - POSIX ACLs - - quotas - - fsck - - defragmentation - -Mount options -============= - -NILFS2 supports the following mount options: -(*) == default - -barrier(*) This enables/disables the use of write barriers. This -nobarrier requires an IO stack which can support barriers, and - if nilfs gets an error on a barrier write, it will - disable again with a warning. -errors=continue Keep going on a filesystem error. -errors=remount-ro(*) Remount the filesystem read-only on an error. -errors=panic Panic and halt the machine if an error occurs. -cp=n Specify the checkpoint-number of the snapshot to be - mounted. Checkpoints and snapshots are listed by lscp - user command. Only the checkpoints marked as snapshot - are mountable with this option. Snapshot is read-only, - so a read-only mount option must be specified together. -order=relaxed(*) Apply relaxed order semantics that allows modified data - blocks to be written to disk without making a - checkpoint if no metadata update is going. This mode - is equivalent to the ordered data mode of the ext3 - filesystem except for the updates on data blocks still - conserve atomicity. This will improve synchronous - write performance for overwriting. -order=strict Apply strict in-order semantics that preserves sequence - of all file operations including overwriting of data - blocks. That means, it is guaranteed that no - overtaking of events occurs in the recovered file - system after a crash. -norecovery Disable recovery of the filesystem on mount. - This disables every write access on the device for - read-only mounts or snapshots. This option will fail - for r/w mounts on an unclean volume. -discard This enables/disables the use of discard/TRIM commands. -nodiscard(*) The discard/TRIM commands are sent to the underlying - block device when blocks are freed. This is useful - for SSD devices and sparse/thinly-provisioned LUNs. - -Ioctls -====== - -There is some NILFS2 specific functionality which can be accessed by applications -through the system call interfaces. The list of all NILFS2 specific ioctls are -shown in the table below. - -Table of NILFS2 specific ioctls -.............................................................................. - Ioctl Description - NILFS_IOCTL_CHANGE_CPMODE Change mode of given checkpoint between - checkpoint and snapshot state. This ioctl is - used in chcp and mkcp utilities. - - NILFS_IOCTL_DELETE_CHECKPOINT Remove checkpoint from NILFS2 file system. - This ioctl is used in rmcp utility. - - NILFS_IOCTL_GET_CPINFO Return info about requested checkpoints. This - ioctl is used in lscp utility and by - nilfs_cleanerd daemon. - - NILFS_IOCTL_GET_CPSTAT Return checkpoints statistics. This ioctl is - used by lscp, rmcp utilities and by - nilfs_cleanerd daemon. - - NILFS_IOCTL_GET_SUINFO Return segment usage info about requested - segments. This ioctl is used in lssu, - nilfs_resize utilities and by nilfs_cleanerd - daemon. - - NILFS_IOCTL_SET_SUINFO Modify segment usage info of requested - segments. This ioctl is used by - nilfs_cleanerd daemon to skip unnecessary - cleaning operation of segments and reduce - performance penalty or wear of flash device - due to redundant move of in-use blocks. - - NILFS_IOCTL_GET_SUSTAT Return segment usage statistics. This ioctl - is used in lssu, nilfs_resize utilities and - by nilfs_cleanerd daemon. - - NILFS_IOCTL_GET_VINFO Return information on virtual block addresses. - This ioctl is used by nilfs_cleanerd daemon. - - NILFS_IOCTL_GET_BDESCS Return information about descriptors of disk - block numbers. This ioctl is used by - nilfs_cleanerd daemon. - - NILFS_IOCTL_CLEAN_SEGMENTS Do garbage collection operation in the - environment of requested parameters from - userspace. This ioctl is used by - nilfs_cleanerd daemon. - - NILFS_IOCTL_SYNC Make a checkpoint. This ioctl is used in - mkcp utility. - - NILFS_IOCTL_RESIZE Resize NILFS2 volume. This ioctl is used - by nilfs_resize utility. - - NILFS_IOCTL_SET_ALLOC_RANGE Define lower limit of segments in bytes and - upper limit of segments in bytes. This ioctl - is used by nilfs_resize utility. - -NILFS2 usage -============ - -To use nilfs2 as a local file system, simply: - - # mkfs -t nilfs2 /dev/block_device - # mount -t nilfs2 /dev/block_device /dir - -This will also invoke the cleaner through the mount helper program -(mount.nilfs2). - -Checkpoints and snapshots are managed by the following commands. -Their manpages are included in the nilfs-utils package above. - - lscp list checkpoints or snapshots. - mkcp make a checkpoint or a snapshot. - chcp change an existing checkpoint to a snapshot or vice versa. - rmcp invalidate specified checkpoint(s). - -To mount a snapshot, - - # mount -t nilfs2 -r -o cp= /dev/block_device /snap_dir - -where is the checkpoint number of the snapshot. - -To unmount the NILFS2 mount point or snapshot, simply: - - # umount /dir - -Then, the cleaner daemon is automatically shut down by the umount -helper program (umount.nilfs2). - -Disk format -=========== - -A nilfs2 volume is equally divided into a number of segments except -for the super block (SB) and segment #0. A segment is the container -of logs. Each log is composed of summary information blocks, payload -blocks, and an optional super root block (SR): - - ______________________________________________________ - | |SB| | Segment | Segment | Segment | ... | Segment | | - |_|__|_|____0____|____1____|____2____|_____|____N____|_| - 0 +1K +4K +8M +16M +24M +(8MB x N) - . . (Typical offsets for 4KB-block) - . . - .______________________. - | log | log |... | log | - |__1__|__2__|____|__m__| - . . - . . - . . - .______________________________. - | Summary | Payload blocks |SR| - |_blocks__|_________________|__| - -The payload blocks are organized per file, and each file consists of -data blocks and B-tree node blocks: - - |<--- File-A --->|<--- File-B --->| - _______________________________________________________________ - | Data blocks | B-tree blocks | Data blocks | B-tree blocks | ... - _|_____________|_______________|_____________|_______________|_ - - -Since only the modified blocks are written in the log, it may have -files without data blocks or B-tree node blocks. - -The organization of the blocks is recorded in the summary information -blocks, which contains a header structure (nilfs_segment_summary), per -file structures (nilfs_finfo), and per block structures (nilfs_binfo): - - _________________________________________________________________________ - | Summary | finfo | binfo | ... | binfo | finfo | binfo | ... | binfo |... - |_blocks__|___A___|_(A,1)_|_____|(A,Na)_|___B___|_(B,1)_|_____|(B,Nb)_|___ - - -The logs include regular files, directory files, symbolic link files -and several meta data files. The mata data files are the files used -to maintain file system meta data. The current version of NILFS2 uses -the following meta data files: - - 1) Inode file (ifile) -- Stores on-disk inodes - 2) Checkpoint file (cpfile) -- Stores checkpoints - 3) Segment usage file (sufile) -- Stores allocation state of segments - 4) Data address translation file -- Maps virtual block numbers to usual - (DAT) block numbers. This file serves to - make on-disk blocks relocatable. - -The following figure shows a typical organization of the logs: - - _________________________________________________________________________ - | Summary | regular file | file | ... | ifile | cpfile | sufile | DAT |SR| - |_blocks__|_or_directory_|_______|_____|_______|________|________|_____|__| - - -To stride over segment boundaries, this sequence of files may be split -into multiple logs. The sequence of logs that should be treated as -logically one log, is delimited with flags marked in the segment -summary. The recovery code of nilfs2 looks this boundary information -to ensure atomicity of updates. - -The super root block is inserted for every checkpoints. It includes -three special inodes, inodes for the DAT, cpfile, and sufile. Inodes -of regular files, directories, symlinks and other special files, are -included in the ifile. The inode of ifile itself is included in the -corresponding checkpoint entry in the cpfile. Thus, the hierarchy -among NILFS2 files can be depicted as follows: - - Super block (SB) - | - v - Super root block (the latest cno=xx) - |-- DAT - |-- sufile - `-- cpfile - |-- ifile (cno=c1) - |-- ifile (cno=c2) ---- file (ino=i1) - : : |-- file (ino=i2) - `-- ifile (cno=xx) |-- file (ino=i3) - : : - `-- file (ino=yy) - ( regular file, directory, or symlink ) - -For detail on the format of each file, please see nilfs2_ondisk.h -located at include/uapi/linux directory. - -There are no patents or other intellectual property that we protect -with regard to the design of NILFS2. It is allowed to replicate the -design in hopes that other operating systems could share (mount, read, -write, etc.) data stored in this format. -- cgit v1.2.3 From 461f2c8f13fcc0d349e4acac46aacf63dbeb34ca Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:13 +0100 Subject: docs: filesystems: convert ntfs.txt to ReST - Add a SPDX header; - Adjust document title; - Comment out text-only ToC; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/f09ca6c9bdd4e7aa7208f3dba0b8753080b38d03.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 3 +- Documentation/filesystems/ntfs.rst | 466 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/ntfs.txt | 451 ---------------------------------- 3 files changed, 468 insertions(+), 452 deletions(-) create mode 100644 Documentation/filesystems/ntfs.rst delete mode 100644 Documentation/filesystems/ntfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 01587704fcc9..62be53c4755d 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -74,7 +74,8 @@ Documentation for filesystem implementations. inotify isofs nilfs2 + nfs/index + ntfs overlayfs virtiofs vfat - nfs/index diff --git a/Documentation/filesystems/ntfs.rst b/Documentation/filesystems/ntfs.rst new file mode 100644 index 000000000000..5bb093a26485 --- /dev/null +++ b/Documentation/filesystems/ntfs.rst @@ -0,0 +1,466 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +The Linux NTFS filesystem driver +================================ + + +.. Table of contents + + - Overview + - Web site + - Features + - Supported mount options + - Known bugs and (mis-)features + - Using NTFS volume and stripe sets + - The Device-Mapper driver + - The Software RAID / MD driver + - Limitations when using the MD driver + + +Overview +======== + +Linux-NTFS comes with a number of user-space programs known as ntfsprogs. +These include mkntfs, a full-featured ntfs filesystem format utility, +ntfsundelete used for recovering files that were unintentionally deleted +from an NTFS volume and ntfsresize which is used to resize an NTFS partition. +See the web site for more information. + +To mount an NTFS 1.2/3.x (Windows NT4/2000/XP/2003) volume, use the file +system type 'ntfs'. The driver currently supports read-only mode (with no +fault-tolerance, encryption or journalling) and very limited, but safe, write +support. + +For fault tolerance and raid support (i.e. volume and stripe sets), you can +use the kernel's Software RAID / MD driver. See section "Using Software RAID +with NTFS" for details. + + +Web site +======== + +There is plenty of additional information on the linux-ntfs web site +at http://www.linux-ntfs.org/ + +The web site has a lot of additional information, such as a comprehensive +FAQ, documentation on the NTFS on-disk format, information on the Linux-NTFS +userspace utilities, etc. + + +Features +======== + +- This is a complete rewrite of the NTFS driver that used to be in the 2.4 and + earlier kernels. This new driver implements NTFS read support and is + functionally equivalent to the old ntfs driver and it also implements limited + write support. The biggest limitation at present is that files/directories + cannot be created or deleted. See below for the list of write features that + are so far supported. Another limitation is that writing to compressed files + is not implemented at all. Also, neither read nor write access to encrypted + files is so far implemented. +- The new driver has full support for sparse files on NTFS 3.x volumes which + the old driver isn't happy with. +- The new driver supports execution of binaries due to mmap() now being + supported. +- The new driver supports loopback mounting of files on NTFS which is used by + some Linux distributions to enable the user to run Linux from an NTFS + partition by creating a large file while in Windows and then loopback + mounting the file while in Linux and creating a Linux filesystem on it that + is used to install Linux on it. +- A comparison of the two drivers using:: + + time find . -type f -exec md5sum "{}" \; + + run three times in sequence with each driver (after a reboot) on a 1.4GiB + NTFS partition, showed the new driver to be 20% faster in total time elapsed + (from 9:43 minutes on average down to 7:53). The time spent in user space + was unchanged but the time spent in the kernel was decreased by a factor of + 2.5 (from 85 CPU seconds down to 33). +- The driver does not support short file names in general. For backwards + compatibility, we implement access to files using their short file names if + they exist. The driver will not create short file names however, and a + rename will discard any existing short file name. +- The new driver supports exporting of mounted NTFS volumes via NFS. +- The new driver supports async io (aio). +- The new driver supports fsync(2), fdatasync(2), and msync(2). +- The new driver supports readv(2) and writev(2). +- The new driver supports access time updates (including mtime and ctime). +- The new driver supports truncate(2) and open(2) with O_TRUNC. But at present + only very limited support for highly fragmented files, i.e. ones which have + their data attribute split across multiple extents, is included. Another + limitation is that at present truncate(2) will never create sparse files, + since to mark a file sparse we need to modify the directory entry for the + file and we do not implement directory modifications yet. +- The new driver supports write(2) which can both overwrite existing data and + extend the file size so that you can write beyond the existing data. Also, + writing into sparse regions is supported and the holes are filled in with + clusters. But at present only limited support for highly fragmented files, + i.e. ones which have their data attribute split across multiple extents, is + included. Another limitation is that write(2) will never create sparse + files, since to mark a file sparse we need to modify the directory entry for + the file and we do not implement directory modifications yet. + +Supported mount options +======================= + +In addition to the generic mount options described by the manual page for the +mount command (man 8 mount, also see man 5 fstab), the NTFS driver supports the +following mount options: + +======================= ======================================================= +iocharset=name Deprecated option. Still supported but please use + nls=name in the future. See description for nls=name. + +nls=name Character set to use when returning file names. + Unlike VFAT, NTFS suppresses names that contain + unconvertible characters. Note that most character + sets contain insufficient characters to represent all + possible Unicode characters that can exist on NTFS. + To be sure you are not missing any files, you are + advised to use nls=utf8 which is capable of + representing all Unicode characters. + +utf8= Option no longer supported. Currently mapped to + nls=utf8 but please use nls=utf8 in the future and + make sure utf8 is compiled either as module or into + the kernel. See description for nls=name. + +uid= +gid= +umask= Provide default owner, group, and access mode mask. + These options work as documented in mount(8). By + default, the files/directories are owned by root and + he/she has read and write permissions, as well as + browse permission for directories. No one else has any + access permissions. I.e. the mode on all files is by + default rw------- and for directories rwx------, a + consequence of the default fmask=0177 and dmask=0077. + Using a umask of zero will grant all permissions to + everyone, i.e. all files and directories will have mode + rwxrwxrwx. + +fmask= +dmask= Instead of specifying umask which applies both to + files and directories, fmask applies only to files and + dmask only to directories. + +sloppy= If sloppy is specified, ignore unknown mount options. + Otherwise the default behaviour is to abort mount if + any unknown options are found. + +show_sys_files= If show_sys_files is specified, show the system files + in directory listings. Otherwise the default behaviour + is to hide the system files. + Note that even when show_sys_files is specified, "$MFT" + will not be visible due to bugs/mis-features in glibc. + Further, note that irrespective of show_sys_files, all + files are accessible by name, i.e. you can always do + "ls -l \$UpCase" for example to specifically show the + system file containing the Unicode upcase table. + +case_sensitive= If case_sensitive is specified, treat all file names as + case sensitive and create file names in the POSIX + namespace. Otherwise the default behaviour is to treat + file names as case insensitive and to create file names + in the WIN32/LONG name space. Note, the Linux NTFS + driver will never create short file names and will + remove them on rename/delete of the corresponding long + file name. + Note that files remain accessible via their short file + name, if it exists. If case_sensitive, you will need + to provide the correct case of the short file name. + +disable_sparse= If disable_sparse is specified, creation of sparse + regions, i.e. holes, inside files is disabled for the + volume (for the duration of this mount only). By + default, creation of sparse regions is enabled, which + is consistent with the behaviour of traditional Unix + filesystems. + +errors=opt What to do when critical filesystem errors are found. + Following values can be used for "opt": + + ======== ========================================= + continue DEFAULT, try to clean-up as much as + possible, e.g. marking a corrupt inode as + bad so it is no longer accessed, and then + continue. + recover At present only supported is recovery of + the boot sector from the backup copy. + If read-only mount, the recovery is done + in memory only and not written to disk. + ======== ========================================= + + Note that the options are additive, i.e. specifying:: + + errors=continue,errors=recover + + means the driver will attempt to recover and if that + fails it will clean-up as much as possible and + continue. + +mft_zone_multiplier= Set the MFT zone multiplier for the volume (this + setting is not persistent across mounts and can be + changed from mount to mount but cannot be changed on + remount). Values of 1 to 4 are allowed, 1 being the + default. The MFT zone multiplier determines how much + space is reserved for the MFT on the volume. If all + other space is used up, then the MFT zone will be + shrunk dynamically, so this has no impact on the + amount of free space. However, it can have an impact + on performance by affecting fragmentation of the MFT. + In general use the default. If you have a lot of small + files then use a higher value. The values have the + following meaning: + + ===== ================================= + Value MFT zone size (% of volume size) + ===== ================================= + 1 12.5% + 2 25% + 3 37.5% + 4 50% + ===== ================================= + + Note this option is irrelevant for read-only mounts. +======================= ======================================================= + + +Known bugs and (mis-)features +============================= + +- The link count on each directory inode entry is set to 1, due to Linux not + supporting directory hard links. This may well confuse some user space + applications, since the directory names will have the same inode numbers. + This also speeds up ntfs_read_inode() immensely. And we haven't found any + problems with this approach so far. If you find a problem with this, please + let us know. + + +Please send bug reports/comments/feedback/abuse to the Linux-NTFS development +list at sourceforge: linux-ntfs-dev@lists.sourceforge.net + + +Using NTFS volume and stripe sets +================================= + +For support of volume and stripe sets, you can either use the kernel's +Device-Mapper driver or the kernel's Software RAID / MD driver. The former is +the recommended one to use for linear raid. But the latter is required for +raid level 5. For striping and mirroring, either driver should work fine. + + +The Device-Mapper driver +------------------------ + +You will need to create a table of the components of the volume/stripe set and +how they fit together and load this into the kernel using the dmsetup utility +(see man 8 dmsetup). + +Linear volume sets, i.e. linear raid, has been tested and works fine. Even +though untested, there is no reason why stripe sets, i.e. raid level 0, and +mirrors, i.e. raid level 1 should not work, too. Stripes with parity, i.e. +raid level 5, unfortunately cannot work yet because the current version of the +Device-Mapper driver does not support raid level 5. You may be able to use the +Software RAID / MD driver for raid level 5, see the next section for details. + +To create the table describing your volume you will need to know each of its +components and their sizes in sectors, i.e. multiples of 512-byte blocks. + +For NT4 fault tolerant volumes you can obtain the sizes using fdisk. So for +example if one of your partitions is /dev/hda2 you would do:: + + $ fdisk -ul /dev/hda + + Disk /dev/hda: 81.9 GB, 81964302336 bytes + 255 heads, 63 sectors/track, 9964 cylinders, total 160086528 sectors + Units = sectors of 1 * 512 = 512 bytes + + Device Boot Start End Blocks Id System + /dev/hda1 * 63 4209029 2104483+ 83 Linux + /dev/hda2 4209030 37768814 16779892+ 86 NTFS + /dev/hda3 37768815 46170809 4200997+ 83 Linux + +And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 = +33559785 sectors. + +For Win2k and later dynamic disks, you can for example use the ldminfo utility +which is part of the Linux LDM tools (the latest version at the time of +writing is linux-ldm-0.0.8.tar.bz2). You can download it from: + + http://www.linux-ntfs.org/ + +Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go +into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You +will find the precompiled (i386) ldminfo utility there. NOTE: You will not be +able to compile this yourself easily so use the binary version! + +Then you would use ldminfo in dump mode to obtain the necessary information:: + + $ ./ldminfo --dump /dev/hda + +This would dump the LDM database found on /dev/hda which describes all of your +dynamic disks and all the volumes on them. At the bottom you will see the +VOLUME DEFINITIONS section which is all you really need. You may need to look +further above to determine which of the disks in the volume definitions is +which device in Linux. Hint: Run ldminfo on each of your dynamic disks and +look at the Disk Id close to the top of the output for each (the PRIVATE HEADER +section). You can then find these Disk Ids in the VBLK DATABASE section in the + components where you will get the LDM Name for the disk that is found in +the VOLUME DEFINITIONS section. + +Note you will also need to enable the LDM driver in the Linux kernel. If your +distribution did not enable it, you will need to recompile the kernel with it +enabled. This will create the LDM partitions on each device at boot time. You +would then use those devices (for /dev/hda they would be /dev/hda1, 2, 3, etc) +in the Device-Mapper table. + +You can also bypass using the LDM driver by using the main device (e.g. +/dev/hda) and then using the offsets of the LDM partitions into this device as +the "Start sector of device" when creating the table. Once again ldminfo would +give you the correct information to do this. + +Assuming you know all your devices and their sizes things are easy. + +For a linear raid the table would look like this (note all values are in +512-byte sectors):: + + # Offset into Size of this Raid type Device Start sector + # volume device of device + 0 1028161 linear /dev/hda1 0 + 1028161 3903762 linear /dev/hdb2 0 + 4931923 2103211 linear /dev/hdc1 0 + +For a striped volume, i.e. raid level 0, you will need to know the chunk size +you used when creating the volume. Windows uses 64kiB as the default, so it +will probably be this unless you changes the defaults when creating the array. + +For a raid level 0 the table would look like this (note all values are in +512-byte sectors):: + + # Offset Size Raid Number Chunk 1st Start 2nd Start + # into of the type of size Device in Device in + # volume volume stripes device device + 0 2056320 striped 2 128 /dev/hda1 0 /dev/hdb1 0 + +If there are more than two devices, just add each of them to the end of the +line. + +Finally, for a mirrored volume, i.e. raid level 1, the table would look like +this (note all values are in 512-byte sectors):: + + # Ofs Size Raid Log Number Region Should Number Source Start Target Start + # in of the type type of log size sync? of Device in Device in + # vol volume params mirrors Device Device + 0 2056320 mirror core 2 16 nosync 2 /dev/hda1 0 /dev/hdb1 0 + +If you are mirroring to multiple devices you can specify further targets at the +end of the line. + +Note the "Should sync?" parameter "nosync" means that the two mirrors are +already in sync which will be the case on a clean shutdown of Windows. If the +mirrors are not clean, you can specify the "sync" option instead of "nosync" +and the Device-Mapper driver will then copy the entirety of the "Source Device" +to the "Target Device" or if you specified multiple target devices to all of +them. + +Once you have your table, save it in a file somewhere (e.g. /etc/ntfsvolume1), +and hand it over to dmsetup to work with, like so:: + + $ dmsetup create myvolume1 /etc/ntfsvolume1 + +You can obviously replace "myvolume1" with whatever name you like. + +If it all worked, you will now have the device /dev/device-mapper/myvolume1 +which you can then just use as an argument to the mount command as usual to +mount the ntfs volume. For example:: + + $ mount -t ntfs -o ro /dev/device-mapper/myvolume1 /mnt/myvol1 + +(You need to create the directory /mnt/myvol1 first and of course you can use +anything you like instead of /mnt/myvol1 as long as it is an existing +directory.) + +It is advisable to do the mount read-only to see if the volume has been setup +correctly to avoid the possibility of causing damage to the data on the ntfs +volume. + + +The Software RAID / MD driver +----------------------------- + +An alternative to using the Device-Mapper driver is to use the kernel's +Software RAID / MD driver. For which you need to set up your /etc/raidtab +appropriately (see man 5 raidtab). + +Linear volume sets, i.e. linear raid, as well as stripe sets, i.e. raid level +0, have been tested and work fine (though see section "Limitations when using +the MD driver with NTFS volumes" especially if you want to use linear raid). +Even though untested, there is no reason why mirrors, i.e. raid level 1, and +stripes with parity, i.e. raid level 5, should not work, too. + +You have to use the "persistent-superblock 0" option for each raid-disk in the +NTFS volume/stripe you are configuring in /etc/raidtab as the persistent +superblock used by the MD driver would damage the NTFS volume. + +Windows by default uses a stripe chunk size of 64k, so you probably want the +"chunk-size 64k" option for each raid-disk, too. + +For example, if you have a stripe set consisting of two partitions /dev/hda5 +and /dev/hdb1 your /etc/raidtab would look like this:: + + raiddev /dev/md0 + raid-level 0 + nr-raid-disks 2 + nr-spare-disks 0 + persistent-superblock 0 + chunk-size 64k + device /dev/hda5 + raid-disk 0 + device /dev/hdb1 + raid-disk 1 + +For linear raid, just change the raid-level above to "raid-level linear", for +mirrors, change it to "raid-level 1", and for stripe sets with parity, change +it to "raid-level 5". + +Note for stripe sets with parity you will also need to tell the MD driver +which parity algorithm to use by specifying the option "parity-algorithm +which", where you need to replace "which" with the name of the algorithm to +use (see man 5 raidtab for available algorithms) and you will have to try the +different available algorithms until you find one that works. Make sure you +are working read-only when playing with this as you may damage your data +otherwise. If you find which algorithm works please let us know (email the +linux-ntfs developers list linux-ntfs-dev@lists.sourceforge.net or drop in on +IRC in channel #ntfs on the irc.freenode.net network) so we can update this +documentation. + +Once the raidtab is setup, run for example raid0run -a to start all devices or +raid0run /dev/md0 to start a particular md device, in this case /dev/md0. + +Then just use the mount command as usual to mount the ntfs volume using for +example:: + + mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume + +It is advisable to do the mount read-only to see if the md volume has been +setup correctly to avoid the possibility of causing damage to the data on the +ntfs volume. + + +Limitations when using the Software RAID / MD driver +----------------------------------------------------- + +Using the md driver will not work properly if any of your NTFS partitions have +an odd number of sectors. This is especially important for linear raid as all +data after the first partition with an odd number of sectors will be offset by +one or more sectors so if you mount such a partition with write support you +will cause massive damage to the data on the volume which will only become +apparent when you try to use the volume again under Windows. + +So when using linear raid, make sure that all your partitions have an even +number of sectors BEFORE attempting to use it. You have been warned! + +Even better is to simply use the Device-Mapper for linear raid and then you do +not have this problem with odd numbers of sectors. diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt deleted file mode 100644 index 553f10d03076..000000000000 --- a/Documentation/filesystems/ntfs.txt +++ /dev/null @@ -1,451 +0,0 @@ -The Linux NTFS filesystem driver -================================ - - -Table of contents -================= - -- Overview -- Web site -- Features -- Supported mount options -- Known bugs and (mis-)features -- Using NTFS volume and stripe sets - - The Device-Mapper driver - - The Software RAID / MD driver - - Limitations when using the MD driver - - -Overview -======== - -Linux-NTFS comes with a number of user-space programs known as ntfsprogs. -These include mkntfs, a full-featured ntfs filesystem format utility, -ntfsundelete used for recovering files that were unintentionally deleted -from an NTFS volume and ntfsresize which is used to resize an NTFS partition. -See the web site for more information. - -To mount an NTFS 1.2/3.x (Windows NT4/2000/XP/2003) volume, use the file -system type 'ntfs'. The driver currently supports read-only mode (with no -fault-tolerance, encryption or journalling) and very limited, but safe, write -support. - -For fault tolerance and raid support (i.e. volume and stripe sets), you can -use the kernel's Software RAID / MD driver. See section "Using Software RAID -with NTFS" for details. - - -Web site -======== - -There is plenty of additional information on the linux-ntfs web site -at http://www.linux-ntfs.org/ - -The web site has a lot of additional information, such as a comprehensive -FAQ, documentation on the NTFS on-disk format, information on the Linux-NTFS -userspace utilities, etc. - - -Features -======== - -- This is a complete rewrite of the NTFS driver that used to be in the 2.4 and - earlier kernels. This new driver implements NTFS read support and is - functionally equivalent to the old ntfs driver and it also implements limited - write support. The biggest limitation at present is that files/directories - cannot be created or deleted. See below for the list of write features that - are so far supported. Another limitation is that writing to compressed files - is not implemented at all. Also, neither read nor write access to encrypted - files is so far implemented. -- The new driver has full support for sparse files on NTFS 3.x volumes which - the old driver isn't happy with. -- The new driver supports execution of binaries due to mmap() now being - supported. -- The new driver supports loopback mounting of files on NTFS which is used by - some Linux distributions to enable the user to run Linux from an NTFS - partition by creating a large file while in Windows and then loopback - mounting the file while in Linux and creating a Linux filesystem on it that - is used to install Linux on it. -- A comparison of the two drivers using: - time find . -type f -exec md5sum "{}" \; - run three times in sequence with each driver (after a reboot) on a 1.4GiB - NTFS partition, showed the new driver to be 20% faster in total time elapsed - (from 9:43 minutes on average down to 7:53). The time spent in user space - was unchanged but the time spent in the kernel was decreased by a factor of - 2.5 (from 85 CPU seconds down to 33). -- The driver does not support short file names in general. For backwards - compatibility, we implement access to files using their short file names if - they exist. The driver will not create short file names however, and a - rename will discard any existing short file name. -- The new driver supports exporting of mounted NTFS volumes via NFS. -- The new driver supports async io (aio). -- The new driver supports fsync(2), fdatasync(2), and msync(2). -- The new driver supports readv(2) and writev(2). -- The new driver supports access time updates (including mtime and ctime). -- The new driver supports truncate(2) and open(2) with O_TRUNC. But at present - only very limited support for highly fragmented files, i.e. ones which have - their data attribute split across multiple extents, is included. Another - limitation is that at present truncate(2) will never create sparse files, - since to mark a file sparse we need to modify the directory entry for the - file and we do not implement directory modifications yet. -- The new driver supports write(2) which can both overwrite existing data and - extend the file size so that you can write beyond the existing data. Also, - writing into sparse regions is supported and the holes are filled in with - clusters. But at present only limited support for highly fragmented files, - i.e. ones which have their data attribute split across multiple extents, is - included. Another limitation is that write(2) will never create sparse - files, since to mark a file sparse we need to modify the directory entry for - the file and we do not implement directory modifications yet. - -Supported mount options -======================= - -In addition to the generic mount options described by the manual page for the -mount command (man 8 mount, also see man 5 fstab), the NTFS driver supports the -following mount options: - -iocharset=name Deprecated option. Still supported but please use - nls=name in the future. See description for nls=name. - -nls=name Character set to use when returning file names. - Unlike VFAT, NTFS suppresses names that contain - unconvertible characters. Note that most character - sets contain insufficient characters to represent all - possible Unicode characters that can exist on NTFS. - To be sure you are not missing any files, you are - advised to use nls=utf8 which is capable of - representing all Unicode characters. - -utf8= Option no longer supported. Currently mapped to - nls=utf8 but please use nls=utf8 in the future and - make sure utf8 is compiled either as module or into - the kernel. See description for nls=name. - -uid= -gid= -umask= Provide default owner, group, and access mode mask. - These options work as documented in mount(8). By - default, the files/directories are owned by root and - he/she has read and write permissions, as well as - browse permission for directories. No one else has any - access permissions. I.e. the mode on all files is by - default rw------- and for directories rwx------, a - consequence of the default fmask=0177 and dmask=0077. - Using a umask of zero will grant all permissions to - everyone, i.e. all files and directories will have mode - rwxrwxrwx. - -fmask= -dmask= Instead of specifying umask which applies both to - files and directories, fmask applies only to files and - dmask only to directories. - -sloppy= If sloppy is specified, ignore unknown mount options. - Otherwise the default behaviour is to abort mount if - any unknown options are found. - -show_sys_files= If show_sys_files is specified, show the system files - in directory listings. Otherwise the default behaviour - is to hide the system files. - Note that even when show_sys_files is specified, "$MFT" - will not be visible due to bugs/mis-features in glibc. - Further, note that irrespective of show_sys_files, all - files are accessible by name, i.e. you can always do - "ls -l \$UpCase" for example to specifically show the - system file containing the Unicode upcase table. - -case_sensitive= If case_sensitive is specified, treat all file names as - case sensitive and create file names in the POSIX - namespace. Otherwise the default behaviour is to treat - file names as case insensitive and to create file names - in the WIN32/LONG name space. Note, the Linux NTFS - driver will never create short file names and will - remove them on rename/delete of the corresponding long - file name. - Note that files remain accessible via their short file - name, if it exists. If case_sensitive, you will need - to provide the correct case of the short file name. - -disable_sparse= If disable_sparse is specified, creation of sparse - regions, i.e. holes, inside files is disabled for the - volume (for the duration of this mount only). By - default, creation of sparse regions is enabled, which - is consistent with the behaviour of traditional Unix - filesystems. - -errors=opt What to do when critical filesystem errors are found. - Following values can be used for "opt": - continue: DEFAULT, try to clean-up as much as - possible, e.g. marking a corrupt inode as - bad so it is no longer accessed, and then - continue. - recover: At present only supported is recovery of - the boot sector from the backup copy. - If read-only mount, the recovery is done - in memory only and not written to disk. - Note that the options are additive, i.e. specifying: - errors=continue,errors=recover - means the driver will attempt to recover and if that - fails it will clean-up as much as possible and - continue. - -mft_zone_multiplier= Set the MFT zone multiplier for the volume (this - setting is not persistent across mounts and can be - changed from mount to mount but cannot be changed on - remount). Values of 1 to 4 are allowed, 1 being the - default. The MFT zone multiplier determines how much - space is reserved for the MFT on the volume. If all - other space is used up, then the MFT zone will be - shrunk dynamically, so this has no impact on the - amount of free space. However, it can have an impact - on performance by affecting fragmentation of the MFT. - In general use the default. If you have a lot of small - files then use a higher value. The values have the - following meaning: - Value MFT zone size (% of volume size) - 1 12.5% - 2 25% - 3 37.5% - 4 50% - Note this option is irrelevant for read-only mounts. - - -Known bugs and (mis-)features -============================= - -- The link count on each directory inode entry is set to 1, due to Linux not - supporting directory hard links. This may well confuse some user space - applications, since the directory names will have the same inode numbers. - This also speeds up ntfs_read_inode() immensely. And we haven't found any - problems with this approach so far. If you find a problem with this, please - let us know. - - -Please send bug reports/comments/feedback/abuse to the Linux-NTFS development -list at sourceforge: linux-ntfs-dev@lists.sourceforge.net - - -Using NTFS volume and stripe sets -================================= - -For support of volume and stripe sets, you can either use the kernel's -Device-Mapper driver or the kernel's Software RAID / MD driver. The former is -the recommended one to use for linear raid. But the latter is required for -raid level 5. For striping and mirroring, either driver should work fine. - - -The Device-Mapper driver ------------------------- - -You will need to create a table of the components of the volume/stripe set and -how they fit together and load this into the kernel using the dmsetup utility -(see man 8 dmsetup). - -Linear volume sets, i.e. linear raid, has been tested and works fine. Even -though untested, there is no reason why stripe sets, i.e. raid level 0, and -mirrors, i.e. raid level 1 should not work, too. Stripes with parity, i.e. -raid level 5, unfortunately cannot work yet because the current version of the -Device-Mapper driver does not support raid level 5. You may be able to use the -Software RAID / MD driver for raid level 5, see the next section for details. - -To create the table describing your volume you will need to know each of its -components and their sizes in sectors, i.e. multiples of 512-byte blocks. - -For NT4 fault tolerant volumes you can obtain the sizes using fdisk. So for -example if one of your partitions is /dev/hda2 you would do: - -$ fdisk -ul /dev/hda - -Disk /dev/hda: 81.9 GB, 81964302336 bytes -255 heads, 63 sectors/track, 9964 cylinders, total 160086528 sectors -Units = sectors of 1 * 512 = 512 bytes - - Device Boot Start End Blocks Id System - /dev/hda1 * 63 4209029 2104483+ 83 Linux - /dev/hda2 4209030 37768814 16779892+ 86 NTFS - /dev/hda3 37768815 46170809 4200997+ 83 Linux - -And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 = -33559785 sectors. - -For Win2k and later dynamic disks, you can for example use the ldminfo utility -which is part of the Linux LDM tools (the latest version at the time of -writing is linux-ldm-0.0.8.tar.bz2). You can download it from: - http://www.linux-ntfs.org/ -Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go -into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You -will find the precompiled (i386) ldminfo utility there. NOTE: You will not be -able to compile this yourself easily so use the binary version! - -Then you would use ldminfo in dump mode to obtain the necessary information: - -$ ./ldminfo --dump /dev/hda - -This would dump the LDM database found on /dev/hda which describes all of your -dynamic disks and all the volumes on them. At the bottom you will see the -VOLUME DEFINITIONS section which is all you really need. You may need to look -further above to determine which of the disks in the volume definitions is -which device in Linux. Hint: Run ldminfo on each of your dynamic disks and -look at the Disk Id close to the top of the output for each (the PRIVATE HEADER -section). You can then find these Disk Ids in the VBLK DATABASE section in the - components where you will get the LDM Name for the disk that is found in -the VOLUME DEFINITIONS section. - -Note you will also need to enable the LDM driver in the Linux kernel. If your -distribution did not enable it, you will need to recompile the kernel with it -enabled. This will create the LDM partitions on each device at boot time. You -would then use those devices (for /dev/hda they would be /dev/hda1, 2, 3, etc) -in the Device-Mapper table. - -You can also bypass using the LDM driver by using the main device (e.g. -/dev/hda) and then using the offsets of the LDM partitions into this device as -the "Start sector of device" when creating the table. Once again ldminfo would -give you the correct information to do this. - -Assuming you know all your devices and their sizes things are easy. - -For a linear raid the table would look like this (note all values are in -512-byte sectors): - ---- cut here --- -# Offset into Size of this Raid type Device Start sector -# volume device of device -0 1028161 linear /dev/hda1 0 -1028161 3903762 linear /dev/hdb2 0 -4931923 2103211 linear /dev/hdc1 0 ---- cut here --- - -For a striped volume, i.e. raid level 0, you will need to know the chunk size -you used when creating the volume. Windows uses 64kiB as the default, so it -will probably be this unless you changes the defaults when creating the array. - -For a raid level 0 the table would look like this (note all values are in -512-byte sectors): - ---- cut here --- -# Offset Size Raid Number Chunk 1st Start 2nd Start -# into of the type of size Device in Device in -# volume volume stripes device device -0 2056320 striped 2 128 /dev/hda1 0 /dev/hdb1 0 ---- cut here --- - -If there are more than two devices, just add each of them to the end of the -line. - -Finally, for a mirrored volume, i.e. raid level 1, the table would look like -this (note all values are in 512-byte sectors): - ---- cut here --- -# Ofs Size Raid Log Number Region Should Number Source Start Target Start -# in of the type type of log size sync? of Device in Device in -# vol volume params mirrors Device Device -0 2056320 mirror core 2 16 nosync 2 /dev/hda1 0 /dev/hdb1 0 ---- cut here --- - -If you are mirroring to multiple devices you can specify further targets at the -end of the line. - -Note the "Should sync?" parameter "nosync" means that the two mirrors are -already in sync which will be the case on a clean shutdown of Windows. If the -mirrors are not clean, you can specify the "sync" option instead of "nosync" -and the Device-Mapper driver will then copy the entirety of the "Source Device" -to the "Target Device" or if you specified multiple target devices to all of -them. - -Once you have your table, save it in a file somewhere (e.g. /etc/ntfsvolume1), -and hand it over to dmsetup to work with, like so: - -$ dmsetup create myvolume1 /etc/ntfsvolume1 - -You can obviously replace "myvolume1" with whatever name you like. - -If it all worked, you will now have the device /dev/device-mapper/myvolume1 -which you can then just use as an argument to the mount command as usual to -mount the ntfs volume. For example: - -$ mount -t ntfs -o ro /dev/device-mapper/myvolume1 /mnt/myvol1 - -(You need to create the directory /mnt/myvol1 first and of course you can use -anything you like instead of /mnt/myvol1 as long as it is an existing -directory.) - -It is advisable to do the mount read-only to see if the volume has been setup -correctly to avoid the possibility of causing damage to the data on the ntfs -volume. - - -The Software RAID / MD driver ------------------------------ - -An alternative to using the Device-Mapper driver is to use the kernel's -Software RAID / MD driver. For which you need to set up your /etc/raidtab -appropriately (see man 5 raidtab). - -Linear volume sets, i.e. linear raid, as well as stripe sets, i.e. raid level -0, have been tested and work fine (though see section "Limitations when using -the MD driver with NTFS volumes" especially if you want to use linear raid). -Even though untested, there is no reason why mirrors, i.e. raid level 1, and -stripes with parity, i.e. raid level 5, should not work, too. - -You have to use the "persistent-superblock 0" option for each raid-disk in the -NTFS volume/stripe you are configuring in /etc/raidtab as the persistent -superblock used by the MD driver would damage the NTFS volume. - -Windows by default uses a stripe chunk size of 64k, so you probably want the -"chunk-size 64k" option for each raid-disk, too. - -For example, if you have a stripe set consisting of two partitions /dev/hda5 -and /dev/hdb1 your /etc/raidtab would look like this: - -raiddev /dev/md0 - raid-level 0 - nr-raid-disks 2 - nr-spare-disks 0 - persistent-superblock 0 - chunk-size 64k - device /dev/hda5 - raid-disk 0 - device /dev/hdb1 - raid-disk 1 - -For linear raid, just change the raid-level above to "raid-level linear", for -mirrors, change it to "raid-level 1", and for stripe sets with parity, change -it to "raid-level 5". - -Note for stripe sets with parity you will also need to tell the MD driver -which parity algorithm to use by specifying the option "parity-algorithm -which", where you need to replace "which" with the name of the algorithm to -use (see man 5 raidtab for available algorithms) and you will have to try the -different available algorithms until you find one that works. Make sure you -are working read-only when playing with this as you may damage your data -otherwise. If you find which algorithm works please let us know (email the -linux-ntfs developers list linux-ntfs-dev@lists.sourceforge.net or drop in on -IRC in channel #ntfs on the irc.freenode.net network) so we can update this -documentation. - -Once the raidtab is setup, run for example raid0run -a to start all devices or -raid0run /dev/md0 to start a particular md device, in this case /dev/md0. - -Then just use the mount command as usual to mount the ntfs volume using for -example: mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume - -It is advisable to do the mount read-only to see if the md volume has been -setup correctly to avoid the possibility of causing damage to the data on the -ntfs volume. - - -Limitations when using the Software RAID / MD driver ------------------------------------------------------ - -Using the md driver will not work properly if any of your NTFS partitions have -an odd number of sectors. This is especially important for linear raid as all -data after the first partition with an odd number of sectors will be offset by -one or more sectors so if you mount such a partition with write support you -will cause massive damage to the data on the volume which will only become -apparent when you try to use the volume again under Windows. - -So when using linear raid, make sure that all your partitions have an even -number of sectors BEFORE attempting to use it. You have been warned! - -Even better is to simply use the Device-Mapper for linear raid and then you do -not have this problem with odd numbers of sectors. -- cgit v1.2.3 From 3d0c60d004644630f1431ce486e76adcc829e288 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:14 +0100 Subject: docs: filesystems: convert ocfs2-online-filecheck.txt to ReST - Add a SPDX header; - Add a document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6007166acc3252697755836354bd29b5a5fb82aa.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + .../filesystems/ocfs2-online-filecheck.rst | 99 ++++++++++++++++++++++ .../filesystems/ocfs2-online-filecheck.txt | 94 -------------------- 3 files changed, 100 insertions(+), 94 deletions(-) create mode 100644 Documentation/filesystems/ocfs2-online-filecheck.rst delete mode 100644 Documentation/filesystems/ocfs2-online-filecheck.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 62be53c4755d..f3a26fdbd04f 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -76,6 +76,7 @@ Documentation for filesystem implementations. nilfs2 nfs/index ntfs + ocfs2-online-filecheck overlayfs virtiofs vfat diff --git a/Documentation/filesystems/ocfs2-online-filecheck.rst b/Documentation/filesystems/ocfs2-online-filecheck.rst new file mode 100644 index 000000000000..2257bb53edc1 --- /dev/null +++ b/Documentation/filesystems/ocfs2-online-filecheck.rst @@ -0,0 +1,99 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================== +OCFS2 file system - online file check +===================================== + +This document will describe OCFS2 online file check feature. + +Introduction +============ +OCFS2 is often used in high-availability systems. However, OCFS2 usually +converts the filesystem to read-only when encounters an error. This may not be +necessary, since turning the filesystem read-only would affect other running +processes as well, decreasing availability. +Then, a mount option (errors=continue) is introduced, which would return the +-EIO errno to the calling process and terminate further processing so that the +filesystem is not corrupted further. The filesystem is not converted to +read-only, and the problematic file's inode number is reported in the kernel +log. The user can try to check/fix this file via online filecheck feature. + +Scope +===== +This effort is to check/fix small issues which may hinder day-to-day operations +of a cluster filesystem by turning the filesystem read-only. The scope of +checking/fixing is at the file level, initially for regular files and eventually +to all files (including system files) of the filesystem. + +In case of directory to file links is incorrect, the directory inode is +reported as erroneous. + +This feature is not suited for extravagant checks which involve dependency of +other components of the filesystem, such as but not limited to, checking if the +bits for file blocks in the allocation has been set. In case of such an error, +the offline fsck should/would be recommended. + +Finally, such an operation/feature should not be automated lest the filesystem +may end up with more damage than before the repair attempt. So, this has to +be performed using user interaction and consent. + +User interface +============== +When there are errors in the OCFS2 filesystem, they are usually accompanied +by the inode number which caused the error. This inode number would be the +input to check/fix the file. + +There is a sysfs directory for each OCFS2 file system mounting:: + + /sys/fs/ocfs2//filecheck + +Here, indicates the name of OCFS2 volume device which has been already +mounted. The file above would accept inode numbers. This could be used to +communicate with kernel space, tell which file(inode number) will be checked or +fixed. Currently, three operations are supported, which includes checking +inode, fixing inode and setting the size of result record history. + +1. If you want to know what error exactly happened to before fixing, do:: + + # echo "" > /sys/fs/ocfs2//filecheck/check + # cat /sys/fs/ocfs2//filecheck/check + +The output is like this:: + + INO DONE ERROR + 39502 1 GENERATION + + lists the inode numbers. + indicates whether the operation has been finished. + says what kind of errors was found. For the detailed error numbers, + please refer to the file linux/fs/ocfs2/filecheck.h. + +2. If you determine to fix this inode, do:: + + # echo "" > /sys/fs/ocfs2//filecheck/fix + # cat /sys/fs/ocfs2//filecheck/fix + +The output is like this::: + + INO DONE ERROR + 39502 1 SUCCESS + +This time, the column indicates whether this fix is successful or not. + +3. The record cache is used to store the history of check/fix results. It's +default size is 10, and can be adjust between the range of 10 ~ 100. You can +adjust the size like this:: + + # echo "" > /sys/fs/ocfs2//filecheck/set + +Fixing stuff +============ +On receiving the inode, the filesystem would read the inode and the +file metadata. In case of errors, the filesystem would fix the errors +and report the problems it fixed in the kernel log. As a precautionary measure, +the inode must first be checked for errors before performing a final fix. + +The inode and the result history will be maintained temporarily in a +small linked list buffer which would contain the last (N) inodes +fixed/checked, the detailed errors which were fixed/checked are printed in the +kernel log. diff --git a/Documentation/filesystems/ocfs2-online-filecheck.txt b/Documentation/filesystems/ocfs2-online-filecheck.txt deleted file mode 100644 index 139fab175c8a..000000000000 --- a/Documentation/filesystems/ocfs2-online-filecheck.txt +++ /dev/null @@ -1,94 +0,0 @@ - OCFS2 online file check - ----------------------- - -This document will describe OCFS2 online file check feature. - -Introduction -============ -OCFS2 is often used in high-availability systems. However, OCFS2 usually -converts the filesystem to read-only when encounters an error. This may not be -necessary, since turning the filesystem read-only would affect other running -processes as well, decreasing availability. -Then, a mount option (errors=continue) is introduced, which would return the --EIO errno to the calling process and terminate further processing so that the -filesystem is not corrupted further. The filesystem is not converted to -read-only, and the problematic file's inode number is reported in the kernel -log. The user can try to check/fix this file via online filecheck feature. - -Scope -===== -This effort is to check/fix small issues which may hinder day-to-day operations -of a cluster filesystem by turning the filesystem read-only. The scope of -checking/fixing is at the file level, initially for regular files and eventually -to all files (including system files) of the filesystem. - -In case of directory to file links is incorrect, the directory inode is -reported as erroneous. - -This feature is not suited for extravagant checks which involve dependency of -other components of the filesystem, such as but not limited to, checking if the -bits for file blocks in the allocation has been set. In case of such an error, -the offline fsck should/would be recommended. - -Finally, such an operation/feature should not be automated lest the filesystem -may end up with more damage than before the repair attempt. So, this has to -be performed using user interaction and consent. - -User interface -============== -When there are errors in the OCFS2 filesystem, they are usually accompanied -by the inode number which caused the error. This inode number would be the -input to check/fix the file. - -There is a sysfs directory for each OCFS2 file system mounting: - - /sys/fs/ocfs2//filecheck - -Here, indicates the name of OCFS2 volume device which has been already -mounted. The file above would accept inode numbers. This could be used to -communicate with kernel space, tell which file(inode number) will be checked or -fixed. Currently, three operations are supported, which includes checking -inode, fixing inode and setting the size of result record history. - -1. If you want to know what error exactly happened to before fixing, do - - # echo "" > /sys/fs/ocfs2//filecheck/check - # cat /sys/fs/ocfs2//filecheck/check - -The output is like this: - INO DONE ERROR -39502 1 GENERATION - - lists the inode numbers. - indicates whether the operation has been finished. - says what kind of errors was found. For the detailed error numbers, -please refer to the file linux/fs/ocfs2/filecheck.h. - -2. If you determine to fix this inode, do - - # echo "" > /sys/fs/ocfs2//filecheck/fix - # cat /sys/fs/ocfs2//filecheck/fix - -The output is like this: - INO DONE ERROR -39502 1 SUCCESS - -This time, the column indicates whether this fix is successful or not. - -3. The record cache is used to store the history of check/fix results. It's -default size is 10, and can be adjust between the range of 10 ~ 100. You can -adjust the size like this: - - # echo "" > /sys/fs/ocfs2//filecheck/set - -Fixing stuff -============ -On receiving the inode, the filesystem would read the inode and the -file metadata. In case of errors, the filesystem would fix the errors -and report the problems it fixed in the kernel log. As a precautionary measure, -the inode must first be checked for errors before performing a final fix. - -The inode and the result history will be maintained temporarily in a -small linked list buffer which would contain the last (N) inodes -fixed/checked, the detailed errors which were fixed/checked are printed in the -kernel log. -- cgit v1.2.3 From fa95e087ff69468b4e452c50c3f4c59a45846b8d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:15 +0100 Subject: docs: filesystems: convert ocfs2.txt to ReST - Add a SPDX header; - Adjust document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Joseph Qi Link: https://lore.kernel.org/r/e29a8120bf1d847f23fb68e915f10a7d43bed9e3.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/ocfs2.rst | 117 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/ocfs2.txt | 106 -------------------------------- 3 files changed, 118 insertions(+), 106 deletions(-) create mode 100644 Documentation/filesystems/ocfs2.rst delete mode 100644 Documentation/filesystems/ocfs2.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index f3a26fdbd04f..3b2b07491c98 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -76,6 +76,7 @@ Documentation for filesystem implementations. nilfs2 nfs/index ntfs + ocfs2 ocfs2-online-filecheck overlayfs virtiofs diff --git a/Documentation/filesystems/ocfs2.rst b/Documentation/filesystems/ocfs2.rst new file mode 100644 index 000000000000..412386bc6506 --- /dev/null +++ b/Documentation/filesystems/ocfs2.rst @@ -0,0 +1,117 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================ +OCFS2 filesystem +================ + +OCFS2 is a general purpose extent based shared disk cluster file +system with many similarities to ext3. It supports 64 bit inode +numbers, and has automatically extending metadata groups which may +also make it attractive for non-clustered use. + +You'll want to install the ocfs2-tools package in order to at least +get "mount.ocfs2" and "ocfs2_hb_ctl". + +Project web page: http://ocfs2.wiki.kernel.org +Tools git tree: https://github.com/markfasheh/ocfs2-tools +OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ + +All code copyright 2005 Oracle except when otherwise noted. + +Credits +======= + +Lots of code taken from ext3 and other projects. + +Authors in alphabetical order: + +- Joel Becker +- Zach Brown +- Mark Fasheh +- Kurt Hackel +- Tao Ma +- Sunil Mushran +- Manish Singh +- Tiger Yang + +Caveats +======= +Features which OCFS2 does not support yet: + + - Directory change notification (F_NOTIFY) + - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) + +Mount options +============= + +OCFS2 supports the following mount options: + +(*) == default + +======================= ======================================================== +barrier=1 This enables/disables barriers. barrier=0 disables it, + barrier=1 enables it. +errors=remount-ro(*) Remount the filesystem read-only on an error. +errors=panic Panic and halt the machine if an error occurs. +intr (*) Allow signals to interrupt cluster operations. +nointr Do not allow signals to interrupt cluster + operations. +noatime Do not update access time. +relatime(*) Update atime if the previous atime is older than + mtime or ctime +strictatime Always update atime, but the minimum update interval + is specified by atime_quantum. +atime_quantum=60(*) OCFS2 will not update atime unless this number + of seconds has passed since the last update. + Set to zero to always update atime. This option need + work with strictatime. +data=ordered (*) All data are forced directly out to the main file + system prior to its metadata being committed to the + journal. +data=writeback Data ordering is not preserved, data may be written + into the main file system after its metadata has been + committed to the journal. +preferred_slot=0(*) During mount, try to use this filesystem slot first. If + it is in use by another node, the first empty one found + will be chosen. Invalid values will be ignored. +commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata + every 'nrsec' seconds. The default value is 5 seconds. + This means that if you lose your power, you will lose + as much as the latest 5 seconds of work (your + filesystem will not be damaged though, thanks to the + journaling). This default value (or any low value) + will hurt performance, but it's good for data-safety. + Setting it to 0 will have the same effect as leaving + it at the default (5 seconds). + Setting it to very large values will improve + performance. +localalloc=8(*) Allows custom localalloc size in MB. If the value is too + large, the fs will silently revert it to the default. +localflocks This disables cluster aware flock. +inode64 Indicates that Ocfs2 is allowed to create inodes at + any location in the filesystem, including those which + will result in inode numbers occupying more than 32 + bits of significance. +user_xattr (*) Enables Extended User Attributes. +nouser_xattr Disables Extended User Attributes. +acl Enables POSIX Access Control Lists support. +noacl (*) Disables POSIX Access Control Lists support. +resv_level=2 (*) Set how aggressive allocation reservations will be. + Valid values are between 0 (reservations off) to 8 + (maximum space for reservations). +dir_resv_level= (*) By default, directory reservations will scale with file + reservations - users should rarely need to change this + value. If allocation reservations are turned off, this + option will have no effect. +coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode + lock will be taken to force other nodes drop cache, + therefore full cluster coherency is guaranteed even + for O_DIRECT writes. +coherency=buffered Allow concurrent O_DIRECT writes without EX lock among + nodes, which gains high performance at risk of getting + stale data on other nodes. +journal_async_commit Commit block can be written to disk without waiting + for descriptor blocks. If enabled older kernels cannot + mount the device. This will enable 'journal_checksum' + internally. +======================= ======================================================== diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt deleted file mode 100644 index 4c49e5410595..000000000000 --- a/Documentation/filesystems/ocfs2.txt +++ /dev/null @@ -1,106 +0,0 @@ -OCFS2 filesystem -================== -OCFS2 is a general purpose extent based shared disk cluster file -system with many similarities to ext3. It supports 64 bit inode -numbers, and has automatically extending metadata groups which may -also make it attractive for non-clustered use. - -You'll want to install the ocfs2-tools package in order to at least -get "mount.ocfs2" and "ocfs2_hb_ctl". - -Project web page: http://ocfs2.wiki.kernel.org -Tools git tree: https://github.com/markfasheh/ocfs2-tools -OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/ - -All code copyright 2005 Oracle except when otherwise noted. - -CREDITS: -Lots of code taken from ext3 and other projects. - -Authors in alphabetical order: -Joel Becker -Zach Brown -Mark Fasheh -Kurt Hackel -Tao Ma -Sunil Mushran -Manish Singh -Tiger Yang - -Caveats -======= -Features which OCFS2 does not support yet: - - Directory change notification (F_NOTIFY) - - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) - -Mount options -============= - -OCFS2 supports the following mount options: -(*) == default - -barrier=1 This enables/disables barriers. barrier=0 disables it, - barrier=1 enables it. -errors=remount-ro(*) Remount the filesystem read-only on an error. -errors=panic Panic and halt the machine if an error occurs. -intr (*) Allow signals to interrupt cluster operations. -nointr Do not allow signals to interrupt cluster - operations. -noatime Do not update access time. -relatime(*) Update atime if the previous atime is older than - mtime or ctime -strictatime Always update atime, but the minimum update interval - is specified by atime_quantum. -atime_quantum=60(*) OCFS2 will not update atime unless this number - of seconds has passed since the last update. - Set to zero to always update atime. This option need - work with strictatime. -data=ordered (*) All data are forced directly out to the main file - system prior to its metadata being committed to the - journal. -data=writeback Data ordering is not preserved, data may be written - into the main file system after its metadata has been - committed to the journal. -preferred_slot=0(*) During mount, try to use this filesystem slot first. If - it is in use by another node, the first empty one found - will be chosen. Invalid values will be ignored. -commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata - every 'nrsec' seconds. The default value is 5 seconds. - This means that if you lose your power, you will lose - as much as the latest 5 seconds of work (your - filesystem will not be damaged though, thanks to the - journaling). This default value (or any low value) - will hurt performance, but it's good for data-safety. - Setting it to 0 will have the same effect as leaving - it at the default (5 seconds). - Setting it to very large values will improve - performance. -localalloc=8(*) Allows custom localalloc size in MB. If the value is too - large, the fs will silently revert it to the default. -localflocks This disables cluster aware flock. -inode64 Indicates that Ocfs2 is allowed to create inodes at - any location in the filesystem, including those which - will result in inode numbers occupying more than 32 - bits of significance. -user_xattr (*) Enables Extended User Attributes. -nouser_xattr Disables Extended User Attributes. -acl Enables POSIX Access Control Lists support. -noacl (*) Disables POSIX Access Control Lists support. -resv_level=2 (*) Set how aggressive allocation reservations will be. - Valid values are between 0 (reservations off) to 8 - (maximum space for reservations). -dir_resv_level= (*) By default, directory reservations will scale with file - reservations - users should rarely need to change this - value. If allocation reservations are turned off, this - option will have no effect. -coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode - lock will be taken to force other nodes drop cache, - therefore full cluster coherency is guaranteed even - for O_DIRECT writes. -coherency=buffered Allow concurrent O_DIRECT writes without EX lock among - nodes, which gains high performance at risk of getting - stale data on other nodes. -journal_async_commit Commit block can be written to disk without waiting - for descriptor blocks. If enabled older kernels cannot - mount the device. This will enable 'journal_checksum' - internally. -- cgit v1.2.3 From 7cbb468f0c70878fe64d324790ee049c1881af7c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:16 +0100 Subject: docs: filesystems: convert omfs.txt to ReST - Add a SPDX header; - Adjust document title; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Bob Copeland Link: https://lore.kernel.org/r/0c125c7c971d81a557ca954992b8d770a9d1e3e8.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/omfs.rst | 112 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/omfs.txt | 106 ---------------------------------- 3 files changed, 113 insertions(+), 106 deletions(-) create mode 100644 Documentation/filesystems/omfs.rst delete mode 100644 Documentation/filesystems/omfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 3b2b07491c98..fbee77175840 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -78,6 +78,7 @@ Documentation for filesystem implementations. ntfs ocfs2 ocfs2-online-filecheck + omfs overlayfs virtiofs vfat diff --git a/Documentation/filesystems/omfs.rst b/Documentation/filesystems/omfs.rst new file mode 100644 index 000000000000..4c8bb3074169 --- /dev/null +++ b/Documentation/filesystems/omfs.rst @@ -0,0 +1,112 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +Optimized MPEG Filesystem (OMFS) +================================ + +Overview +======== + +OMFS is a filesystem created by SonicBlue for use in the ReplayTV DVR +and Rio Karma MP3 player. The filesystem is extent-based, utilizing +block sizes from 2k to 8k, with hash-based directories. This +filesystem driver may be used to read and write disks from these +devices. + +Note, it is not recommended that this FS be used in place of a general +filesystem for your own streaming media device. Native Linux filesystems +will likely perform better. + +More information is available at: + + http://linux-karma.sf.net/ + +Various utilities, including mkomfs and omfsck, are included with +omfsprogs, available at: + + http://bobcopeland.com/karma/ + +Instructions are included in its README. + +Options +======= + +OMFS supports the following mount-time options: + + ============ ======================================== + uid=n make all files owned by specified user + gid=n make all files owned by specified group + umask=xxx set permission umask to xxx + fmask=xxx set umask to xxx for files + dmask=xxx set umask to xxx for directories + ============ ======================================== + +Disk format +=========== + +OMFS discriminates between "sysblocks" and normal data blocks. The sysblock +group consists of super block information, file metadata, directory structures, +and extents. Each sysblock has a header containing CRCs of the entire +sysblock, and may be mirrored in successive blocks on the disk. A sysblock may +have a smaller size than a data block, but since they are both addressed by the +same 64-bit block number, any remaining space in the smaller sysblock is +unused. + +Sysblock header information:: + + struct omfs_header { + __be64 h_self; /* FS block where this is located */ + __be32 h_body_size; /* size of useful data after header */ + __be16 h_crc; /* crc-ccitt of body_size bytes */ + char h_fill1[2]; + u8 h_version; /* version, always 1 */ + char h_type; /* OMFS_INODE_X */ + u8 h_magic; /* OMFS_IMAGIC */ + u8 h_check_xor; /* XOR of header bytes before this */ + __be32 h_fill2; + }; + +Files and directories are both represented by omfs_inode:: + + struct omfs_inode { + struct omfs_header i_head; /* header */ + __be64 i_parent; /* parent containing this inode */ + __be64 i_sibling; /* next inode in hash bucket */ + __be64 i_ctime; /* ctime, in milliseconds */ + char i_fill1[35]; + char i_type; /* OMFS_[DIR,FILE] */ + __be32 i_fill2; + char i_fill3[64]; + char i_name[OMFS_NAMELEN]; /* filename */ + __be64 i_size; /* size of file, in bytes */ + }; + +Directories in OMFS are implemented as a large hash table. Filenames are +hashed then prepended into the bucket list beginning at OMFS_DIR_START. +Lookup requires hashing the filename, then seeking across i_sibling pointers +until a match is found on i_name. Empty buckets are represented by block +pointers with all-1s (~0). + +A file is an omfs_inode structure followed by an extent table beginning at +OMFS_EXTENT_START:: + + struct omfs_extent_entry { + __be64 e_cluster; /* start location of a set of blocks */ + __be64 e_blocks; /* number of blocks after e_cluster */ + }; + + struct omfs_extent { + __be64 e_next; /* next extent table location */ + __be32 e_extent_count; /* total # extents in this table */ + __be32 e_fill; + struct omfs_extent_entry e_entry; /* start of extent entries */ + }; + +Each extent holds the block offset followed by number of blocks allocated to +the extent. The final extent in each table is a terminator with e_cluster +being ~0 and e_blocks being ones'-complement of the total number of blocks +in the table. + +If this table overflows, a continuation inode is written and pointed to by +e_next. These have a header but lack the rest of the inode structure. + diff --git a/Documentation/filesystems/omfs.txt b/Documentation/filesystems/omfs.txt deleted file mode 100644 index 1d0d41ff5c65..000000000000 --- a/Documentation/filesystems/omfs.txt +++ /dev/null @@ -1,106 +0,0 @@ -Optimized MPEG Filesystem (OMFS) - -Overview -======== - -OMFS is a filesystem created by SonicBlue for use in the ReplayTV DVR -and Rio Karma MP3 player. The filesystem is extent-based, utilizing -block sizes from 2k to 8k, with hash-based directories. This -filesystem driver may be used to read and write disks from these -devices. - -Note, it is not recommended that this FS be used in place of a general -filesystem for your own streaming media device. Native Linux filesystems -will likely perform better. - -More information is available at: - - http://linux-karma.sf.net/ - -Various utilities, including mkomfs and omfsck, are included with -omfsprogs, available at: - - http://bobcopeland.com/karma/ - -Instructions are included in its README. - -Options -======= - -OMFS supports the following mount-time options: - - uid=n - make all files owned by specified user - gid=n - make all files owned by specified group - umask=xxx - set permission umask to xxx - fmask=xxx - set umask to xxx for files - dmask=xxx - set umask to xxx for directories - -Disk format -=========== - -OMFS discriminates between "sysblocks" and normal data blocks. The sysblock -group consists of super block information, file metadata, directory structures, -and extents. Each sysblock has a header containing CRCs of the entire -sysblock, and may be mirrored in successive blocks on the disk. A sysblock may -have a smaller size than a data block, but since they are both addressed by the -same 64-bit block number, any remaining space in the smaller sysblock is -unused. - -Sysblock header information: - -struct omfs_header { - __be64 h_self; /* FS block where this is located */ - __be32 h_body_size; /* size of useful data after header */ - __be16 h_crc; /* crc-ccitt of body_size bytes */ - char h_fill1[2]; - u8 h_version; /* version, always 1 */ - char h_type; /* OMFS_INODE_X */ - u8 h_magic; /* OMFS_IMAGIC */ - u8 h_check_xor; /* XOR of header bytes before this */ - __be32 h_fill2; -}; - -Files and directories are both represented by omfs_inode: - -struct omfs_inode { - struct omfs_header i_head; /* header */ - __be64 i_parent; /* parent containing this inode */ - __be64 i_sibling; /* next inode in hash bucket */ - __be64 i_ctime; /* ctime, in milliseconds */ - char i_fill1[35]; - char i_type; /* OMFS_[DIR,FILE] */ - __be32 i_fill2; - char i_fill3[64]; - char i_name[OMFS_NAMELEN]; /* filename */ - __be64 i_size; /* size of file, in bytes */ -}; - -Directories in OMFS are implemented as a large hash table. Filenames are -hashed then prepended into the bucket list beginning at OMFS_DIR_START. -Lookup requires hashing the filename, then seeking across i_sibling pointers -until a match is found on i_name. Empty buckets are represented by block -pointers with all-1s (~0). - -A file is an omfs_inode structure followed by an extent table beginning at -OMFS_EXTENT_START: - -struct omfs_extent_entry { - __be64 e_cluster; /* start location of a set of blocks */ - __be64 e_blocks; /* number of blocks after e_cluster */ -}; - -struct omfs_extent { - __be64 e_next; /* next extent table location */ - __be32 e_extent_count; /* total # extents in this table */ - __be32 e_fill; - struct omfs_extent_entry e_entry; /* start of extent entries */ -}; - -Each extent holds the block offset followed by number of blocks allocated to -the extent. The final extent in each table is a terminator with e_cluster -being ~0 and e_blocks being ones'-complement of the total number of blocks -in the table. - -If this table overflows, a continuation inode is written and pointed to by -e_next. These have a header but lack the rest of the inode structure. - -- cgit v1.2.3 From 18ccb2233fc5f7c27b5be17f5b6585c2fa62d919 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:17 +0100 Subject: docs: filesystems: convert orangefs.txt to ReST - Add a SPDX header; - Adjust document and section titles; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6f438eeff5b029d229197a602bd9b74004fe9b63.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/orangefs.rst | 554 +++++++++++++++++++++++++++++++++ Documentation/filesystems/orangefs.txt | 529 ------------------------------- 3 files changed, 555 insertions(+), 529 deletions(-) create mode 100644 Documentation/filesystems/orangefs.rst delete mode 100644 Documentation/filesystems/orangefs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index fbee77175840..fed53f831192 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -79,6 +79,7 @@ Documentation for filesystem implementations. ocfs2 ocfs2-online-filecheck omfs + orangefs overlayfs virtiofs vfat diff --git a/Documentation/filesystems/orangefs.rst b/Documentation/filesystems/orangefs.rst new file mode 100644 index 000000000000..7d6d4cad73c4 --- /dev/null +++ b/Documentation/filesystems/orangefs.rst @@ -0,0 +1,554 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======== +ORANGEFS +======== + +OrangeFS is an LGPL userspace scale-out parallel storage system. It is ideal +for large storage problems faced by HPC, BigData, Streaming Video, +Genomics, Bioinformatics. + +Orangefs, originally called PVFS, was first developed in 1993 by +Walt Ligon and Eric Blumer as a parallel file system for Parallel +Virtual Machine (PVM) as part of a NASA grant to study the I/O patterns +of parallel programs. + +Orangefs features include: + + * Distributes file data among multiple file servers + * Supports simultaneous access by multiple clients + * Stores file data and metadata on servers using local file system + and access methods + * Userspace implementation is easy to install and maintain + * Direct MPI support + * Stateless + + +Mailing List Archives +===================== + +http://lists.orangefs.org/pipermail/devel_lists.orangefs.org/ + + +Mailing List Submissions +======================== + +devel@lists.orangefs.org + + +Documentation +============= + +http://www.orangefs.org/documentation/ + + +Userspace Filesystem Source +=========================== + +http://www.orangefs.org/download + +Orangefs versions prior to 2.9.3 would not be compatible with the +upstream version of the kernel client. + + +Running ORANGEFS On a Single Server +=================================== + +OrangeFS is usually run in large installations with multiple servers and +clients, but a complete filesystem can be run on a single machine for +development and testing. + +On Fedora, install orangefs and orangefs-server:: + + dnf -y install orangefs orangefs-server + +There is an example server configuration file in +/etc/orangefs/orangefs.conf. Change localhost to your hostname if +necessary. + +To generate a filesystem to run xfstests against, see below. + +There is an example client configuration file in /etc/pvfs2tab. It is a +single line. Uncomment it and change the hostname if necessary. This +controls clients which use libpvfs2. This does not control the +pvfs2-client-core. + +Create the filesystem:: + + pvfs2-server -f /etc/orangefs/orangefs.conf + +Start the server:: + + systemctl start orangefs-server + +Test the server:: + + pvfs2-ping -m /pvfsmnt + +Start the client. The module must be compiled in or loaded before this +point:: + + systemctl start orangefs-client + +Mount the filesystem:: + + mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt + + +Building ORANGEFS on a Single Server +==================================== + +Where OrangeFS cannot be installed from distribution packages, it may be +built from source. + +You can omit --prefix if you don't care that things are sprinkled around +in /usr/local. As of version 2.9.6, OrangeFS uses Berkeley DB by +default, we will probably be changing the default to LMDB soon. + +:: + + ./configure --prefix=/opt/ofs --with-db-backend=lmdb + + make + + make install + +Create an orangefs config file:: + + /opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf + +Create an /etc/pvfs2tab file:: + + echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \ + /etc/pvfs2tab + +Create the mount point you specified in the tab file if needed:: + + mkdir /pvfsmnt + +Bootstrap the server:: + + /opt/ofs/sbin/pvfs2-server -f /etc/pvfs2.conf + +Start the server:: + + /opt/osf/sbin/pvfs2-server /etc/pvfs2.conf + +Now the server should be running. Pvfs2-ls is a simple +test to verify that the server is running:: + + /opt/ofs/bin/pvfs2-ls /pvfsmnt + +If stuff seems to be working, load the kernel module and +turn on the client core:: + + /opt/ofs/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core + +Mount your filesystem:: + + mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt + + +Running xfstests +================ + +It is useful to use a scratch filesystem with xfstests. This can be +done with only one server. + +Make a second copy of the FileSystem section in the server configuration +file, which is /etc/orangefs/orangefs.conf. Change the Name to scratch. +Change the ID to something other than the ID of the first FileSystem +section (2 is usually a good choice). + +Then there are two FileSystem sections: orangefs and scratch. + +This change should be made before creating the filesystem. + +:: + + pvfs2-server -f /etc/orangefs/orangefs.conf + +To run xfstests, create /etc/xfsqa.config:: + + TEST_DIR=/orangefs + TEST_DEV=tcp://localhost:3334/orangefs + SCRATCH_MNT=/scratch + SCRATCH_DEV=tcp://localhost:3334/scratch + +Then xfstests can be run:: + + ./check -pvfs2 + + +Options +======= + +The following mount options are accepted: + + acl + Allow the use of Access Control Lists on files and directories. + + intr + Some operations between the kernel client and the user space + filesystem can be interruptible, such as changes in debug levels + and the setting of tunable parameters. + + local_lock + Enable posix locking from the perspective of "this" kernel. The + default file_operations lock action is to return ENOSYS. Posix + locking kicks in if the filesystem is mounted with -o local_lock. + Distributed locking is being worked on for the future. + + +Debugging +========= + +If you want the debug (GOSSIP) statements in a particular +source file (inode.c for example) go to syslog:: + + echo inode > /sys/kernel/debug/orangefs/kernel-debug + +No debugging (the default):: + + echo none > /sys/kernel/debug/orangefs/kernel-debug + +Debugging from several source files:: + + echo inode,dir > /sys/kernel/debug/orangefs/kernel-debug + +All debugging:: + + echo all > /sys/kernel/debug/orangefs/kernel-debug + +Get a list of all debugging keywords:: + + cat /sys/kernel/debug/orangefs/debug-help + + +Protocol between Kernel Module and Userspace +============================================ + +Orangefs is a user space filesystem and an associated kernel module. +We'll just refer to the user space part of Orangefs as "userspace" +from here on out. Orangefs descends from PVFS, and userspace code +still uses PVFS for function and variable names. Userspace typedefs +many of the important structures. Function and variable names in +the kernel module have been transitioned to "orangefs", and The Linux +Coding Style avoids typedefs, so kernel module structures that +correspond to userspace structures are not typedefed. + +The kernel module implements a pseudo device that userspace +can read from and write to. Userspace can also manipulate the +kernel module through the pseudo device with ioctl. + +The Bufmap +---------- + +At startup userspace allocates two page-size-aligned (posix_memalign) +mlocked memory buffers, one is used for IO and one is used for readdir +operations. The IO buffer is 41943040 bytes and the readdir buffer is +4194304 bytes. Each buffer contains logical chunks, or partitions, and +a pointer to each buffer is added to its own PVFS_dev_map_desc structure +which also describes its total size, as well as the size and number of +the partitions. + +A pointer to the IO buffer's PVFS_dev_map_desc structure is sent to a +mapping routine in the kernel module with an ioctl. The structure is +copied from user space to kernel space with copy_from_user and is used +to initialize the kernel module's "bufmap" (struct orangefs_bufmap), which +then contains: + + * refcnt + - a reference counter + * desc_size - PVFS2_BUFMAP_DEFAULT_DESC_SIZE (4194304) - the IO buffer's + partition size, which represents the filesystem's block size and + is used for s_blocksize in super blocks. + * desc_count - PVFS2_BUFMAP_DEFAULT_DESC_COUNT (10) - the number of + partitions in the IO buffer. + * desc_shift - log2(desc_size), used for s_blocksize_bits in super blocks. + * total_size - the total size of the IO buffer. + * page_count - the number of 4096 byte pages in the IO buffer. + * page_array - a pointer to ``page_count * (sizeof(struct page*))`` bytes + of kcalloced memory. This memory is used as an array of pointers + to each of the pages in the IO buffer through a call to get_user_pages. + * desc_array - a pointer to ``desc_count * (sizeof(struct orangefs_bufmap_desc))`` + bytes of kcalloced memory. This memory is further intialized: + + user_desc is the kernel's copy of the IO buffer's ORANGEFS_dev_map_desc + structure. user_desc->ptr points to the IO buffer. + + :: + + pages_per_desc = bufmap->desc_size / PAGE_SIZE + offset = 0 + + bufmap->desc_array[0].page_array = &bufmap->page_array[offset] + bufmap->desc_array[0].array_count = pages_per_desc = 1024 + bufmap->desc_array[0].uaddr = (user_desc->ptr) + (0 * 1024 * 4096) + offset += 1024 + . + . + . + bufmap->desc_array[9].page_array = &bufmap->page_array[offset] + bufmap->desc_array[9].array_count = pages_per_desc = 1024 + bufmap->desc_array[9].uaddr = (user_desc->ptr) + + (9 * 1024 * 4096) + offset += 1024 + + * buffer_index_array - a desc_count sized array of ints, used to + indicate which of the IO buffer's partitions are available to use. + * buffer_index_lock - a spinlock to protect buffer_index_array during update. + * readdir_index_array - a five (ORANGEFS_READDIR_DEFAULT_DESC_COUNT) element + int array used to indicate which of the readdir buffer's partitions are + available to use. + * readdir_index_lock - a spinlock to protect readdir_index_array during + update. + +Operations +---------- + +The kernel module builds an "op" (struct orangefs_kernel_op_s) when it +needs to communicate with userspace. Part of the op contains the "upcall" +which expresses the request to userspace. Part of the op eventually +contains the "downcall" which expresses the results of the request. + +The slab allocator is used to keep a cache of op structures handy. + +At init time the kernel module defines and initializes a request list +and an in_progress hash table to keep track of all the ops that are +in flight at any given time. + +Ops are stateful: + + * unknown + - op was just initialized + * waiting + - op is on request_list (upward bound) + * inprogr + - op is in progress (waiting for downcall) + * serviced + - op has matching downcall; ok + * purged + - op has to start a timer since client-core + exited uncleanly before servicing op + * given up + - submitter has given up waiting for it + +When some arbitrary userspace program needs to perform a +filesystem operation on Orangefs (readdir, I/O, create, whatever) +an op structure is initialized and tagged with a distinguishing ID +number. The upcall part of the op is filled out, and the op is +passed to the "service_operation" function. + +Service_operation changes the op's state to "waiting", puts +it on the request list, and signals the Orangefs file_operations.poll +function through a wait queue. Userspace is polling the pseudo-device +and thus becomes aware of the upcall request that needs to be read. + +When the Orangefs file_operations.read function is triggered, the +request list is searched for an op that seems ready-to-process. +The op is removed from the request list. The tag from the op and +the filled-out upcall struct are copy_to_user'ed back to userspace. + +If any of these (and some additional protocol) copy_to_users fail, +the op's state is set to "waiting" and the op is added back to +the request list. Otherwise, the op's state is changed to "in progress", +and the op is hashed on its tag and put onto the end of a list in the +in_progress hash table at the index the tag hashed to. + +When userspace has assembled the response to the upcall, it +writes the response, which includes the distinguishing tag, back to +the pseudo device in a series of io_vecs. This triggers the Orangefs +file_operations.write_iter function to find the op with the associated +tag and remove it from the in_progress hash table. As long as the op's +state is not "canceled" or "given up", its state is set to "serviced". +The file_operations.write_iter function returns to the waiting vfs, +and back to service_operation through wait_for_matching_downcall. + +Service operation returns to its caller with the op's downcall +part (the response to the upcall) filled out. + +The "client-core" is the bridge between the kernel module and +userspace. The client-core is a daemon. The client-core has an +associated watchdog daemon. If the client-core is ever signaled +to die, the watchdog daemon restarts the client-core. Even though +the client-core is restarted "right away", there is a period of +time during such an event that the client-core is dead. A dead client-core +can't be triggered by the Orangefs file_operations.poll function. +Ops that pass through service_operation during a "dead spell" can timeout +on the wait queue and one attempt is made to recycle them. Obviously, +if the client-core stays dead too long, the arbitrary userspace processes +trying to use Orangefs will be negatively affected. Waiting ops +that can't be serviced will be removed from the request list and +have their states set to "given up". In-progress ops that can't +be serviced will be removed from the in_progress hash table and +have their states set to "given up". + +Readdir and I/O ops are atypical with respect to their payloads. + + - readdir ops use the smaller of the two pre-allocated pre-partitioned + memory buffers. The readdir buffer is only available to userspace. + The kernel module obtains an index to a free partition before launching + a readdir op. Userspace deposits the results into the indexed partition + and then writes them to back to the pvfs device. + + - io (read and write) ops use the larger of the two pre-allocated + pre-partitioned memory buffers. The IO buffer is accessible from + both userspace and the kernel module. The kernel module obtains an + index to a free partition before launching an io op. The kernel module + deposits write data into the indexed partition, to be consumed + directly by userspace. Userspace deposits the results of read + requests into the indexed partition, to be consumed directly + by the kernel module. + +Responses to kernel requests are all packaged in pvfs2_downcall_t +structs. Besides a few other members, pvfs2_downcall_t contains a +union of structs, each of which is associated with a particular +response type. + +The several members outside of the union are: + + ``int32_t type`` + - type of operation. + ``int32_t status`` + - return code for the operation. + ``int64_t trailer_size`` + - 0 unless readdir operation. + ``char *trailer_buf`` + - initialized to NULL, used during readdir operations. + +The appropriate member inside the union is filled out for any +particular response. + + PVFS2_VFS_OP_FILE_IO + fill a pvfs2_io_response_t + + PVFS2_VFS_OP_LOOKUP + fill a PVFS_object_kref + + PVFS2_VFS_OP_CREATE + fill a PVFS_object_kref + + PVFS2_VFS_OP_SYMLINK + fill a PVFS_object_kref + + PVFS2_VFS_OP_GETATTR + fill in a PVFS_sys_attr_s (tons of stuff the kernel doesn't need) + fill in a string with the link target when the object is a symlink. + + PVFS2_VFS_OP_MKDIR + fill a PVFS_object_kref + + PVFS2_VFS_OP_STATFS + fill a pvfs2_statfs_response_t with useless info . It is hard for + us to know, in a timely fashion, these statistics about our + distributed network filesystem. + + PVFS2_VFS_OP_FS_MOUNT + fill a pvfs2_fs_mount_response_t which is just like a PVFS_object_kref + except its members are in a different order and "__pad1" is replaced + with "id". + + PVFS2_VFS_OP_GETXATTR + fill a pvfs2_getxattr_response_t + + PVFS2_VFS_OP_LISTXATTR + fill a pvfs2_listxattr_response_t + + PVFS2_VFS_OP_PARAM + fill a pvfs2_param_response_t + + PVFS2_VFS_OP_PERF_COUNT + fill a pvfs2_perf_count_response_t + + PVFS2_VFS_OP_FSKEY + file a pvfs2_fs_key_response_t + + PVFS2_VFS_OP_READDIR + jamb everything needed to represent a pvfs2_readdir_response_t into + the readdir buffer descriptor specified in the upcall. + +Userspace uses writev() on /dev/pvfs2-req to pass responses to the requests +made by the kernel side. + +A buffer_list containing: + + - a pointer to the prepared response to the request from the + kernel (struct pvfs2_downcall_t). + - and also, in the case of a readdir request, a pointer to a + buffer containing descriptors for the objects in the target + directory. + +... is sent to the function (PINT_dev_write_list) which performs +the writev. + +PINT_dev_write_list has a local iovec array: struct iovec io_array[10]; + +The first four elements of io_array are initialized like this for all +responses:: + + io_array[0].iov_base = address of local variable "proto_ver" (int32_t) + io_array[0].iov_len = sizeof(int32_t) + + io_array[1].iov_base = address of global variable "pdev_magic" (int32_t) + io_array[1].iov_len = sizeof(int32_t) + + io_array[2].iov_base = address of parameter "tag" (PVFS_id_gen_t) + io_array[2].iov_len = sizeof(int64_t) + + io_array[3].iov_base = address of out_downcall member (pvfs2_downcall_t) + of global variable vfs_request (vfs_request_t) + io_array[3].iov_len = sizeof(pvfs2_downcall_t) + +Readdir responses initialize the fifth element io_array like this:: + + io_array[4].iov_base = contents of member trailer_buf (char *) + from out_downcall member of global variable + vfs_request + io_array[4].iov_len = contents of member trailer_size (PVFS_size) + from out_downcall member of global variable + vfs_request + +Orangefs exploits the dcache in order to avoid sending redundant +requests to userspace. We keep object inode attributes up-to-date with +orangefs_inode_getattr. Orangefs_inode_getattr uses two arguments to +help it decide whether or not to update an inode: "new" and "bypass". +Orangefs keeps private data in an object's inode that includes a short +timeout value, getattr_time, which allows any iteration of +orangefs_inode_getattr to know how long it has been since the inode was +updated. When the object is not new (new == 0) and the bypass flag is not +set (bypass == 0) orangefs_inode_getattr returns without updating the inode +if getattr_time has not timed out. Getattr_time is updated each time the +inode is updated. + +Creation of a new object (file, dir, sym-link) includes the evaluation of +its pathname, resulting in a negative directory entry for the object. +A new inode is allocated and associated with the dentry, turning it from +a negative dentry into a "productive full member of society". Orangefs +obtains the new inode from Linux with new_inode() and associates +the inode with the dentry by sending the pair back to Linux with +d_instantiate(). + +The evaluation of a pathname for an object resolves to its corresponding +dentry. If there is no corresponding dentry, one is created for it in +the dcache. Whenever a dentry is modified or verified Orangefs stores a +short timeout value in the dentry's d_time, and the dentry will be trusted +for that amount of time. Orangefs is a network filesystem, and objects +can potentially change out-of-band with any particular Orangefs kernel module +instance, so trusting a dentry is risky. The alternative to trusting +dentries is to always obtain the needed information from userspace - at +least a trip to the client-core, maybe to the servers. Obtaining information +from a dentry is cheap, obtaining it from userspace is relatively expensive, +hence the motivation to use the dentry when possible. + +The timeout values d_time and getattr_time are jiffy based, and the +code is designed to avoid the jiffy-wrap problem:: + + "In general, if the clock may have wrapped around more than once, there + is no way to tell how much time has elapsed. However, if the times t1 + and t2 are known to be fairly close, we can reliably compute the + difference in a way that takes into account the possibility that the + clock may have wrapped between times." + +from course notes by instructor Andy Wang + diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.txt deleted file mode 100644 index f4ba94950e3f..000000000000 --- a/Documentation/filesystems/orangefs.txt +++ /dev/null @@ -1,529 +0,0 @@ -ORANGEFS -======== - -OrangeFS is an LGPL userspace scale-out parallel storage system. It is ideal -for large storage problems faced by HPC, BigData, Streaming Video, -Genomics, Bioinformatics. - -Orangefs, originally called PVFS, was first developed in 1993 by -Walt Ligon and Eric Blumer as a parallel file system for Parallel -Virtual Machine (PVM) as part of a NASA grant to study the I/O patterns -of parallel programs. - -Orangefs features include: - - * Distributes file data among multiple file servers - * Supports simultaneous access by multiple clients - * Stores file data and metadata on servers using local file system - and access methods - * Userspace implementation is easy to install and maintain - * Direct MPI support - * Stateless - - -MAILING LIST ARCHIVES -===================== - -http://lists.orangefs.org/pipermail/devel_lists.orangefs.org/ - - -MAILING LIST SUBMISSIONS -======================== - -devel@lists.orangefs.org - - -DOCUMENTATION -============= - -http://www.orangefs.org/documentation/ - - -USERSPACE FILESYSTEM SOURCE -=========================== - -http://www.orangefs.org/download - -Orangefs versions prior to 2.9.3 would not be compatible with the -upstream version of the kernel client. - - -RUNNING ORANGEFS ON A SINGLE SERVER -=================================== - -OrangeFS is usually run in large installations with multiple servers and -clients, but a complete filesystem can be run on a single machine for -development and testing. - -On Fedora, install orangefs and orangefs-server. - -dnf -y install orangefs orangefs-server - -There is an example server configuration file in -/etc/orangefs/orangefs.conf. Change localhost to your hostname if -necessary. - -To generate a filesystem to run xfstests against, see below. - -There is an example client configuration file in /etc/pvfs2tab. It is a -single line. Uncomment it and change the hostname if necessary. This -controls clients which use libpvfs2. This does not control the -pvfs2-client-core. - -Create the filesystem. - -pvfs2-server -f /etc/orangefs/orangefs.conf - -Start the server. - -systemctl start orangefs-server - -Test the server. - -pvfs2-ping -m /pvfsmnt - -Start the client. The module must be compiled in or loaded before this -point. - -systemctl start orangefs-client - -Mount the filesystem. - -mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt - - -BUILDING ORANGEFS ON A SINGLE SERVER -==================================== - -Where OrangeFS cannot be installed from distribution packages, it may be -built from source. - -You can omit --prefix if you don't care that things are sprinkled around -in /usr/local. As of version 2.9.6, OrangeFS uses Berkeley DB by -default, we will probably be changing the default to LMDB soon. - -./configure --prefix=/opt/ofs --with-db-backend=lmdb - -make - -make install - -Create an orangefs config file. - -/opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf - -Create an /etc/pvfs2tab file. - -echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \ - /etc/pvfs2tab - -Create the mount point you specified in the tab file if needed. - -mkdir /pvfsmnt - -Bootstrap the server. - -/opt/ofs/sbin/pvfs2-server -f /etc/pvfs2.conf - -Start the server. - -/opt/osf/sbin/pvfs2-server /etc/pvfs2.conf - -Now the server should be running. Pvfs2-ls is a simple -test to verify that the server is running. - -/opt/ofs/bin/pvfs2-ls /pvfsmnt - -If stuff seems to be working, load the kernel module and -turn on the client core. - -/opt/ofs/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core - -Mount your filesystem. - -mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt - - -RUNNING XFSTESTS -================ - -It is useful to use a scratch filesystem with xfstests. This can be -done with only one server. - -Make a second copy of the FileSystem section in the server configuration -file, which is /etc/orangefs/orangefs.conf. Change the Name to scratch. -Change the ID to something other than the ID of the first FileSystem -section (2 is usually a good choice). - -Then there are two FileSystem sections: orangefs and scratch. - -This change should be made before creating the filesystem. - -pvfs2-server -f /etc/orangefs/orangefs.conf - -To run xfstests, create /etc/xfsqa.config. - -TEST_DIR=/orangefs -TEST_DEV=tcp://localhost:3334/orangefs -SCRATCH_MNT=/scratch -SCRATCH_DEV=tcp://localhost:3334/scratch - -Then xfstests can be run - -./check -pvfs2 - - -OPTIONS -======= - -The following mount options are accepted: - - acl - Allow the use of Access Control Lists on files and directories. - - intr - Some operations between the kernel client and the user space - filesystem can be interruptible, such as changes in debug levels - and the setting of tunable parameters. - - local_lock - Enable posix locking from the perspective of "this" kernel. The - default file_operations lock action is to return ENOSYS. Posix - locking kicks in if the filesystem is mounted with -o local_lock. - Distributed locking is being worked on for the future. - - -DEBUGGING -========= - -If you want the debug (GOSSIP) statements in a particular -source file (inode.c for example) go to syslog: - - echo inode > /sys/kernel/debug/orangefs/kernel-debug - -No debugging (the default): - - echo none > /sys/kernel/debug/orangefs/kernel-debug - -Debugging from several source files: - - echo inode,dir > /sys/kernel/debug/orangefs/kernel-debug - -All debugging: - - echo all > /sys/kernel/debug/orangefs/kernel-debug - -Get a list of all debugging keywords: - - cat /sys/kernel/debug/orangefs/debug-help - - -PROTOCOL BETWEEN KERNEL MODULE AND USERSPACE -============================================ - -Orangefs is a user space filesystem and an associated kernel module. -We'll just refer to the user space part of Orangefs as "userspace" -from here on out. Orangefs descends from PVFS, and userspace code -still uses PVFS for function and variable names. Userspace typedefs -many of the important structures. Function and variable names in -the kernel module have been transitioned to "orangefs", and The Linux -Coding Style avoids typedefs, so kernel module structures that -correspond to userspace structures are not typedefed. - -The kernel module implements a pseudo device that userspace -can read from and write to. Userspace can also manipulate the -kernel module through the pseudo device with ioctl. - -THE BUFMAP: - -At startup userspace allocates two page-size-aligned (posix_memalign) -mlocked memory buffers, one is used for IO and one is used for readdir -operations. The IO buffer is 41943040 bytes and the readdir buffer is -4194304 bytes. Each buffer contains logical chunks, or partitions, and -a pointer to each buffer is added to its own PVFS_dev_map_desc structure -which also describes its total size, as well as the size and number of -the partitions. - -A pointer to the IO buffer's PVFS_dev_map_desc structure is sent to a -mapping routine in the kernel module with an ioctl. The structure is -copied from user space to kernel space with copy_from_user and is used -to initialize the kernel module's "bufmap" (struct orangefs_bufmap), which -then contains: - - * refcnt - a reference counter - * desc_size - PVFS2_BUFMAP_DEFAULT_DESC_SIZE (4194304) - the IO buffer's - partition size, which represents the filesystem's block size and - is used for s_blocksize in super blocks. - * desc_count - PVFS2_BUFMAP_DEFAULT_DESC_COUNT (10) - the number of - partitions in the IO buffer. - * desc_shift - log2(desc_size), used for s_blocksize_bits in super blocks. - * total_size - the total size of the IO buffer. - * page_count - the number of 4096 byte pages in the IO buffer. - * page_array - a pointer to page_count * (sizeof(struct page*)) bytes - of kcalloced memory. This memory is used as an array of pointers - to each of the pages in the IO buffer through a call to get_user_pages. - * desc_array - a pointer to desc_count * (sizeof(struct orangefs_bufmap_desc)) - bytes of kcalloced memory. This memory is further intialized: - - user_desc is the kernel's copy of the IO buffer's ORANGEFS_dev_map_desc - structure. user_desc->ptr points to the IO buffer. - - pages_per_desc = bufmap->desc_size / PAGE_SIZE - offset = 0 - - bufmap->desc_array[0].page_array = &bufmap->page_array[offset] - bufmap->desc_array[0].array_count = pages_per_desc = 1024 - bufmap->desc_array[0].uaddr = (user_desc->ptr) + (0 * 1024 * 4096) - offset += 1024 - . - . - . - bufmap->desc_array[9].page_array = &bufmap->page_array[offset] - bufmap->desc_array[9].array_count = pages_per_desc = 1024 - bufmap->desc_array[9].uaddr = (user_desc->ptr) + - (9 * 1024 * 4096) - offset += 1024 - - * buffer_index_array - a desc_count sized array of ints, used to - indicate which of the IO buffer's partitions are available to use. - * buffer_index_lock - a spinlock to protect buffer_index_array during update. - * readdir_index_array - a five (ORANGEFS_READDIR_DEFAULT_DESC_COUNT) element - int array used to indicate which of the readdir buffer's partitions are - available to use. - * readdir_index_lock - a spinlock to protect readdir_index_array during - update. - -OPERATIONS: - -The kernel module builds an "op" (struct orangefs_kernel_op_s) when it -needs to communicate with userspace. Part of the op contains the "upcall" -which expresses the request to userspace. Part of the op eventually -contains the "downcall" which expresses the results of the request. - -The slab allocator is used to keep a cache of op structures handy. - -At init time the kernel module defines and initializes a request list -and an in_progress hash table to keep track of all the ops that are -in flight at any given time. - -Ops are stateful: - - * unknown - op was just initialized - * waiting - op is on request_list (upward bound) - * inprogr - op is in progress (waiting for downcall) - * serviced - op has matching downcall; ok - * purged - op has to start a timer since client-core - exited uncleanly before servicing op - * given up - submitter has given up waiting for it - -When some arbitrary userspace program needs to perform a -filesystem operation on Orangefs (readdir, I/O, create, whatever) -an op structure is initialized and tagged with a distinguishing ID -number. The upcall part of the op is filled out, and the op is -passed to the "service_operation" function. - -Service_operation changes the op's state to "waiting", puts -it on the request list, and signals the Orangefs file_operations.poll -function through a wait queue. Userspace is polling the pseudo-device -and thus becomes aware of the upcall request that needs to be read. - -When the Orangefs file_operations.read function is triggered, the -request list is searched for an op that seems ready-to-process. -The op is removed from the request list. The tag from the op and -the filled-out upcall struct are copy_to_user'ed back to userspace. - -If any of these (and some additional protocol) copy_to_users fail, -the op's state is set to "waiting" and the op is added back to -the request list. Otherwise, the op's state is changed to "in progress", -and the op is hashed on its tag and put onto the end of a list in the -in_progress hash table at the index the tag hashed to. - -When userspace has assembled the response to the upcall, it -writes the response, which includes the distinguishing tag, back to -the pseudo device in a series of io_vecs. This triggers the Orangefs -file_operations.write_iter function to find the op with the associated -tag and remove it from the in_progress hash table. As long as the op's -state is not "canceled" or "given up", its state is set to "serviced". -The file_operations.write_iter function returns to the waiting vfs, -and back to service_operation through wait_for_matching_downcall. - -Service operation returns to its caller with the op's downcall -part (the response to the upcall) filled out. - -The "client-core" is the bridge between the kernel module and -userspace. The client-core is a daemon. The client-core has an -associated watchdog daemon. If the client-core is ever signaled -to die, the watchdog daemon restarts the client-core. Even though -the client-core is restarted "right away", there is a period of -time during such an event that the client-core is dead. A dead client-core -can't be triggered by the Orangefs file_operations.poll function. -Ops that pass through service_operation during a "dead spell" can timeout -on the wait queue and one attempt is made to recycle them. Obviously, -if the client-core stays dead too long, the arbitrary userspace processes -trying to use Orangefs will be negatively affected. Waiting ops -that can't be serviced will be removed from the request list and -have their states set to "given up". In-progress ops that can't -be serviced will be removed from the in_progress hash table and -have their states set to "given up". - -Readdir and I/O ops are atypical with respect to their payloads. - - - readdir ops use the smaller of the two pre-allocated pre-partitioned - memory buffers. The readdir buffer is only available to userspace. - The kernel module obtains an index to a free partition before launching - a readdir op. Userspace deposits the results into the indexed partition - and then writes them to back to the pvfs device. - - - io (read and write) ops use the larger of the two pre-allocated - pre-partitioned memory buffers. The IO buffer is accessible from - both userspace and the kernel module. The kernel module obtains an - index to a free partition before launching an io op. The kernel module - deposits write data into the indexed partition, to be consumed - directly by userspace. Userspace deposits the results of read - requests into the indexed partition, to be consumed directly - by the kernel module. - -Responses to kernel requests are all packaged in pvfs2_downcall_t -structs. Besides a few other members, pvfs2_downcall_t contains a -union of structs, each of which is associated with a particular -response type. - -The several members outside of the union are: - - int32_t type - type of operation. - - int32_t status - return code for the operation. - - int64_t trailer_size - 0 unless readdir operation. - - char *trailer_buf - initialized to NULL, used during readdir operations. - -The appropriate member inside the union is filled out for any -particular response. - - PVFS2_VFS_OP_FILE_IO - fill a pvfs2_io_response_t - - PVFS2_VFS_OP_LOOKUP - fill a PVFS_object_kref - - PVFS2_VFS_OP_CREATE - fill a PVFS_object_kref - - PVFS2_VFS_OP_SYMLINK - fill a PVFS_object_kref - - PVFS2_VFS_OP_GETATTR - fill in a PVFS_sys_attr_s (tons of stuff the kernel doesn't need) - fill in a string with the link target when the object is a symlink. - - PVFS2_VFS_OP_MKDIR - fill a PVFS_object_kref - - PVFS2_VFS_OP_STATFS - fill a pvfs2_statfs_response_t with useless info . It is hard for - us to know, in a timely fashion, these statistics about our - distributed network filesystem. - - PVFS2_VFS_OP_FS_MOUNT - fill a pvfs2_fs_mount_response_t which is just like a PVFS_object_kref - except its members are in a different order and "__pad1" is replaced - with "id". - - PVFS2_VFS_OP_GETXATTR - fill a pvfs2_getxattr_response_t - - PVFS2_VFS_OP_LISTXATTR - fill a pvfs2_listxattr_response_t - - PVFS2_VFS_OP_PARAM - fill a pvfs2_param_response_t - - PVFS2_VFS_OP_PERF_COUNT - fill a pvfs2_perf_count_response_t - - PVFS2_VFS_OP_FSKEY - file a pvfs2_fs_key_response_t - - PVFS2_VFS_OP_READDIR - jamb everything needed to represent a pvfs2_readdir_response_t into - the readdir buffer descriptor specified in the upcall. - -Userspace uses writev() on /dev/pvfs2-req to pass responses to the requests -made by the kernel side. - -A buffer_list containing: - - a pointer to the prepared response to the request from the - kernel (struct pvfs2_downcall_t). - - and also, in the case of a readdir request, a pointer to a - buffer containing descriptors for the objects in the target - directory. -... is sent to the function (PINT_dev_write_list) which performs -the writev. - -PINT_dev_write_list has a local iovec array: struct iovec io_array[10]; - -The first four elements of io_array are initialized like this for all -responses: - - io_array[0].iov_base = address of local variable "proto_ver" (int32_t) - io_array[0].iov_len = sizeof(int32_t) - - io_array[1].iov_base = address of global variable "pdev_magic" (int32_t) - io_array[1].iov_len = sizeof(int32_t) - - io_array[2].iov_base = address of parameter "tag" (PVFS_id_gen_t) - io_array[2].iov_len = sizeof(int64_t) - - io_array[3].iov_base = address of out_downcall member (pvfs2_downcall_t) - of global variable vfs_request (vfs_request_t) - io_array[3].iov_len = sizeof(pvfs2_downcall_t) - -Readdir responses initialize the fifth element io_array like this: - - io_array[4].iov_base = contents of member trailer_buf (char *) - from out_downcall member of global variable - vfs_request - io_array[4].iov_len = contents of member trailer_size (PVFS_size) - from out_downcall member of global variable - vfs_request - -Orangefs exploits the dcache in order to avoid sending redundant -requests to userspace. We keep object inode attributes up-to-date with -orangefs_inode_getattr. Orangefs_inode_getattr uses two arguments to -help it decide whether or not to update an inode: "new" and "bypass". -Orangefs keeps private data in an object's inode that includes a short -timeout value, getattr_time, which allows any iteration of -orangefs_inode_getattr to know how long it has been since the inode was -updated. When the object is not new (new == 0) and the bypass flag is not -set (bypass == 0) orangefs_inode_getattr returns without updating the inode -if getattr_time has not timed out. Getattr_time is updated each time the -inode is updated. - -Creation of a new object (file, dir, sym-link) includes the evaluation of -its pathname, resulting in a negative directory entry for the object. -A new inode is allocated and associated with the dentry, turning it from -a negative dentry into a "productive full member of society". Orangefs -obtains the new inode from Linux with new_inode() and associates -the inode with the dentry by sending the pair back to Linux with -d_instantiate(). - -The evaluation of a pathname for an object resolves to its corresponding -dentry. If there is no corresponding dentry, one is created for it in -the dcache. Whenever a dentry is modified or verified Orangefs stores a -short timeout value in the dentry's d_time, and the dentry will be trusted -for that amount of time. Orangefs is a network filesystem, and objects -can potentially change out-of-band with any particular Orangefs kernel module -instance, so trusting a dentry is risky. The alternative to trusting -dentries is to always obtain the needed information from userspace - at -least a trip to the client-core, maybe to the servers. Obtaining information -from a dentry is cheap, obtaining it from userspace is relatively expensive, -hence the motivation to use the dentry when possible. - -The timeout values d_time and getattr_time are jiffy based, and the -code is designed to avoid the jiffy-wrap problem: - -"In general, if the clock may have wrapped around more than once, there -is no way to tell how much time has elapsed. However, if the times t1 -and t2 are known to be fairly close, we can reliably compute the -difference in a way that takes into account the possibility that the -clock may have wrapped between times." - - from course notes by instructor Andy Wang - -- cgit v1.2.3 From c33e97efa9d9de538e5f0afe6cb07f83afcd5b68 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:18 +0100 Subject: docs: filesystems: convert proc.txt to ReST This document has a nice format! Unfortunately, not exactly ReST. So, several adjustments were required: - Add a SPDX header; - Adjust document and section titles; - Whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add table captions; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/1d113d860188de416ca3b0b97371dc2195433d5b.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/proc.rst | 2169 +++++++++++++++++++++++++++++++++++ Documentation/filesystems/proc.txt | 2047 --------------------------------- 3 files changed, 2170 insertions(+), 2047 deletions(-) create mode 100644 Documentation/filesystems/proc.rst delete mode 100644 Documentation/filesystems/proc.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index fed53f831192..671906e2fee6 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -81,5 +81,6 @@ Documentation for filesystem implementations. omfs orangefs overlayfs + proc virtiofs vfat diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst new file mode 100644 index 000000000000..38b606991065 --- /dev/null +++ b/Documentation/filesystems/proc.rst @@ -0,0 +1,2169 @@ +.. SPDX-License-Identifier: GPL-2.0 + +==================== +The /proc Filesystem +==================== + +===================== ======================================= ================ +/proc/sys Terrehon Bowden , October 7 1999 + Bodo Bauer +2.4.x update Jorge Nerin November 14 2000 +move /proc/sys Shen Feng April 1 2009 +fixes/update part 1.1 Stefani Seibold June 9 2009 +===================== ======================================= ================ + + + +.. Table of Contents + + 0 Preface + 0.1 Introduction/Credits + 0.2 Legal Stuff + + 1 Collecting System Information + 1.1 Process-Specific Subdirectories + 1.2 Kernel data + 1.3 IDE devices in /proc/ide + 1.4 Networking info in /proc/net + 1.5 SCSI info + 1.6 Parallel port info in /proc/parport + 1.7 TTY info in /proc/tty + 1.8 Miscellaneous kernel statistics in /proc/stat + 1.9 Ext4 file system parameters + + 2 Modifying System Parameters + + 3 Per-Process Parameters + 3.1 /proc//oom_adj & /proc//oom_score_adj - Adjust the oom-killer + score + 3.2 /proc//oom_score - Display current oom-killer score + 3.3 /proc//io - Display the IO accounting fields + 3.4 /proc//coredump_filter - Core dump filtering settings + 3.5 /proc//mountinfo - Information about mounts + 3.6 /proc//comm & /proc//task//comm + 3.7 /proc//task//children - Information about task children + 3.8 /proc//fdinfo/ - Information about opened file + 3.9 /proc//map_files - Information about memory mapped files + 3.10 /proc//timerslack_ns - Task timerslack value + 3.11 /proc//patch_state - Livepatch patch operation state + 3.12 /proc//arch_status - Task architecture specific information + + 4 Configuring procfs + 4.1 Mount options + +Preface +======= + +0.1 Introduction/Credits +------------------------ + +This documentation is part of a soon (or so we hope) to be released book on +the SuSE Linux distribution. As there is no complete documentation for the +/proc file system and we've used many freely available sources to write these +chapters, it seems only fair to give the work back to the Linux community. +This work is based on the 2.2.* kernel version and the upcoming 2.4.*. I'm +afraid it's still far from complete, but we hope it will be useful. As far as +we know, it is the first 'all-in-one' document about the /proc file system. It +is focused on the Intel x86 hardware, so if you are looking for PPC, ARM, +SPARC, AXP, etc., features, you probably won't find what you are looking for. +It also only covers IPv4 networking, not IPv6 nor other protocols - sorry. But +additions and patches are welcome and will be added to this document if you +mail them to Bodo. + +We'd like to thank Alan Cox, Rik van Riel, and Alexey Kuznetsov and a lot of +other people for help compiling this documentation. We'd also like to extend a +special thank you to Andi Kleen for documentation, which we relied on heavily +to create this document, as well as the additional information he provided. +Thanks to everybody else who contributed source or docs to the Linux kernel +and helped create a great piece of software... :) + +If you have any comments, corrections or additions, please don't hesitate to +contact Bodo Bauer at bb@ricochet.net. We'll be happy to add them to this +document. + +The latest version of this document is available online at +http://tldp.org/LDP/Linux-Filesystem-Hierarchy/html/proc.html + +If the above direction does not works for you, you could try the kernel +mailing list at linux-kernel@vger.kernel.org and/or try to reach me at +comandante@zaralinux.com. + +0.2 Legal Stuff +--------------- + +We don't guarantee the correctness of this document, and if you come to us +complaining about how you screwed up your system because of incorrect +documentation, we won't feel responsible... + +Chapter 1: Collecting System Information +======================================== + +In This Chapter +--------------- +* Investigating the properties of the pseudo file system /proc and its + ability to provide information on the running Linux system +* Examining /proc's structure +* Uncovering various information about the kernel and the processes running + on the system + +------------------------------------------------------------------------------ + +The proc file system acts as an interface to internal data structures in the +kernel. It can be used to obtain information about the system and to change +certain kernel parameters at runtime (sysctl). + +First, we'll take a look at the read-only parts of /proc. In Chapter 2, we +show you how you can use /proc/sys to change settings. + +1.1 Process-Specific Subdirectories +----------------------------------- + +The directory /proc contains (among other things) one subdirectory for each +process running on the system, which is named after the process ID (PID). + +The link self points to the process reading the file system. Each process +subdirectory has the entries listed in Table 1-1. + +Note that an open a file descriptor to /proc/ or to any of its +contained files or subdirectories does not prevent being reused +for some other process in the event that exits. Operations on +open /proc/ file descriptors corresponding to dead processes +never act on any new process that the kernel may, through chance, have +also assigned the process ID . Instead, operations on these FDs +usually fail with ESRCH. + +.. table:: Table 1-1: Process specific entries in /proc + + ============= =============================================================== + File Content + ============= =============================================================== + clear_refs Clears page referenced bits shown in smaps output + cmdline Command line arguments + cpu Current and last cpu in which it was executed (2.4)(smp) + cwd Link to the current working directory + environ Values of environment variables + exe Link to the executable of this process + fd Directory, which contains all file descriptors + maps Memory maps to executables and library files (2.4) + mem Memory held by this process + root Link to the root directory of this process + stat Process status + statm Process memory status information + status Process status in human readable form + wchan Present with CONFIG_KALLSYMS=y: it shows the kernel function + symbol the task is blocked in - or "0" if not blocked. + pagemap Page table + stack Report full stack trace, enable via CONFIG_STACKTRACE + smaps An extension based on maps, showing the memory consumption of + each mapping and flags associated with it + smaps_rollup Accumulated smaps stats for all mappings of the process. This + can be derived from smaps, but is faster and more convenient + numa_maps An extension based on maps, showing the memory locality and + binding policy as well as mem usage (in pages) of each mapping. + ============= =============================================================== + +For example, to get the status information of a process, all you have to do is +read the file /proc/PID/status:: + + >cat /proc/self/status + Name: cat + State: R (running) + Tgid: 5452 + Pid: 5452 + PPid: 743 + TracerPid: 0 (2.4) + Uid: 501 501 501 501 + Gid: 100 100 100 100 + FDSize: 256 + Groups: 100 14 16 + VmPeak: 5004 kB + VmSize: 5004 kB + VmLck: 0 kB + VmHWM: 476 kB + VmRSS: 476 kB + RssAnon: 352 kB + RssFile: 120 kB + RssShmem: 4 kB + VmData: 156 kB + VmStk: 88 kB + VmExe: 68 kB + VmLib: 1412 kB + VmPTE: 20 kb + VmSwap: 0 kB + HugetlbPages: 0 kB + CoreDumping: 0 + THP_enabled: 1 + Threads: 1 + SigQ: 0/28578 + SigPnd: 0000000000000000 + ShdPnd: 0000000000000000 + SigBlk: 0000000000000000 + SigIgn: 0000000000000000 + SigCgt: 0000000000000000 + CapInh: 00000000fffffeff + CapPrm: 0000000000000000 + CapEff: 0000000000000000 + CapBnd: ffffffffffffffff + CapAmb: 0000000000000000 + NoNewPrivs: 0 + Seccomp: 0 + Speculation_Store_Bypass: thread vulnerable + voluntary_ctxt_switches: 0 + nonvoluntary_ctxt_switches: 1 + +This shows you nearly the same information you would get if you viewed it with +the ps command. In fact, ps uses the proc file system to obtain its +information. But you get a more detailed view of the process by reading the +file /proc/PID/status. It fields are described in table 1-2. + +The statm file contains more detailed information about the process +memory usage. Its seven fields are explained in Table 1-3. The stat file +contains details information about the process itself. Its fields are +explained in Table 1-4. + +(for SMP CONFIG users) + +For making accounting scalable, RSS related information are handled in an +asynchronous manner and the value may not be very precise. To see a precise +snapshot of a moment, you can see /proc//smaps file and scan page table. +It's slow but very precise. + +.. table:: Table 1-2: Contents of the status files (as of 4.19) + + ========================== =================================================== + Field Content + ========================== =================================================== + Name filename of the executable + Umask file mode creation mask + State state (R is running, S is sleeping, D is sleeping + in an uninterruptible wait, Z is zombie, + T is traced or stopped) + Tgid thread group ID + Ngid NUMA group ID (0 if none) + Pid process id + PPid process id of the parent process + TracerPid PID of process tracing this process (0 if not) + Uid Real, effective, saved set, and file system UIDs + Gid Real, effective, saved set, and file system GIDs + FDSize number of file descriptor slots currently allocated + Groups supplementary group list + NStgid descendant namespace thread group ID hierarchy + NSpid descendant namespace process ID hierarchy + NSpgid descendant namespace process group ID hierarchy + NSsid descendant namespace session ID hierarchy + VmPeak peak virtual memory size + VmSize total program size + VmLck locked memory size + VmPin pinned memory size + VmHWM peak resident set size ("high water mark") + VmRSS size of memory portions. It contains the three + following parts + (VmRSS = RssAnon + RssFile + RssShmem) + RssAnon size of resident anonymous memory + RssFile size of resident file mappings + RssShmem size of resident shmem memory (includes SysV shm, + mapping of tmpfs and shared anonymous mappings) + VmData size of private data segments + VmStk size of stack segments + VmExe size of text segment + VmLib size of shared library code + VmPTE size of page table entries + VmSwap amount of swap used by anonymous private data + (shmem swap usage is not included) + HugetlbPages size of hugetlb memory portions + CoreDumping process's memory is currently being dumped + (killing the process may lead to a corrupted core) + THP_enabled process is allowed to use THP (returns 0 when + PR_SET_THP_DISABLE is set on the process + Threads number of threads + SigQ number of signals queued/max. number for queue + SigPnd bitmap of pending signals for the thread + ShdPnd bitmap of shared pending signals for the process + SigBlk bitmap of blocked signals + SigIgn bitmap of ignored signals + SigCgt bitmap of caught signals + CapInh bitmap of inheritable capabilities + CapPrm bitmap of permitted capabilities + CapEff bitmap of effective capabilities + CapBnd bitmap of capabilities bounding set + CapAmb bitmap of ambient capabilities + NoNewPrivs no_new_privs, like prctl(PR_GET_NO_NEW_PRIV, ...) + Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...) + Speculation_Store_Bypass speculative store bypass mitigation status + Cpus_allowed mask of CPUs on which this process may run + Cpus_allowed_list Same as previous, but in "list format" + Mems_allowed mask of memory nodes allowed to this process + Mems_allowed_list Same as previous, but in "list format" + voluntary_ctxt_switches number of voluntary context switches + nonvoluntary_ctxt_switches number of non voluntary context switches + ========================== =================================================== + + +.. table:: Table 1-3: Contents of the statm files (as of 2.6.8-rc3) + + ======== =============================== ============================== + Field Content + ======== =============================== ============================== + size total program size (pages) (same as VmSize in status) + resident size of memory portions (pages) (same as VmRSS in status) + shared number of pages that are shared (i.e. backed by a file, same + as RssFile+RssShmem in status) + trs number of pages that are 'code' (not including libs; broken, + includes data segment) + lrs number of pages of library (always 0 on 2.6) + drs number of pages of data/stack (including libs; broken, + includes library text) + dt number of dirty pages (always 0 on 2.6) + ======== =============================== ============================== + + +.. table:: Table 1-4: Contents of the stat files (as of 2.6.30-rc7) + + ============= =============================================================== + Field Content + ============= =============================================================== + pid process id + tcomm filename of the executable + state state (R is running, S is sleeping, D is sleeping in an + uninterruptible wait, Z is zombie, T is traced or stopped) + ppid process id of the parent process + pgrp pgrp of the process + sid session id + tty_nr tty the process uses + tty_pgrp pgrp of the tty + flags task flags + min_flt number of minor faults + cmin_flt number of minor faults with child's + maj_flt number of major faults + cmaj_flt number of major faults with child's + utime user mode jiffies + stime kernel mode jiffies + cutime user mode jiffies with child's + cstime kernel mode jiffies with child's + priority priority level + nice nice level + num_threads number of threads + it_real_value (obsolete, always 0) + start_time time the process started after system boot + vsize virtual memory size + rss resident set memory size + rsslim current limit in bytes on the rss + start_code address above which program text can run + end_code address below which program text can run + start_stack address of the start of the main process stack + esp current value of ESP + eip current value of EIP + pending bitmap of pending signals + blocked bitmap of blocked signals + sigign bitmap of ignored signals + sigcatch bitmap of caught signals + 0 (place holder, used to be the wchan address, + use /proc/PID/wchan instead) + 0 (place holder) + 0 (place holder) + exit_signal signal to send to parent thread on exit + task_cpu which CPU the task is scheduled on + rt_priority realtime priority + policy scheduling policy (man sched_setscheduler) + blkio_ticks time spent waiting for block IO + gtime guest time of the task in jiffies + cgtime guest time of the task children in jiffies + start_data address above which program data+bss is placed + end_data address below which program data+bss is placed + start_brk address above which program heap can be expanded with brk() + arg_start address above which program command line is placed + arg_end address below which program command line is placed + env_start address above which program environment is placed + env_end address below which program environment is placed + exit_code the thread's exit_code in the form reported by the waitpid + system call + ============= =============================================================== + +The /proc/PID/maps file contains the currently mapped memory regions and +their access permissions. + +The format is:: + + address perms offset dev inode pathname + + 08048000-08049000 r-xp 00000000 03:00 8312 /opt/test + 08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test + 0804a000-0806b000 rw-p 00000000 00:00 0 [heap] + a7cb1000-a7cb2000 ---p 00000000 00:00 0 + a7cb2000-a7eb2000 rw-p 00000000 00:00 0 + a7eb2000-a7eb3000 ---p 00000000 00:00 0 + a7eb3000-a7ed5000 rw-p 00000000 00:00 0 + a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 + a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 + a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 + a800b000-a800e000 rw-p 00000000 00:00 0 + a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0 + a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0 + a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0 + a8024000-a8027000 rw-p 00000000 00:00 0 + a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2 + a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2 + a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2 + aff35000-aff4a000 rw-p 00000000 00:00 0 [stack] + ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso] + +where "address" is the address space in the process that it occupies, "perms" +is a set of permissions:: + + r = read + w = write + x = execute + s = shared + p = private (copy on write) + +"offset" is the offset into the mapping, "dev" is the device (major:minor), and +"inode" is the inode on that device. 0 indicates that no inode is associated +with the memory region, as the case would be with BSS (uninitialized data). +The "pathname" shows the name associated file for this mapping. If the mapping +is not associated with a file: + + ======= ==================================== + [heap] the heap of the program + [stack] the stack of the main process + [vdso] the "virtual dynamic shared object", + the kernel system call handler + ======= ==================================== + + or if empty, the mapping is anonymous. + +The /proc/PID/smaps is an extension based on maps, showing the memory +consumption for each of the process's mappings. For each mapping (aka Virtual +Memory Area, or VMA) there is a series of lines such as the following:: + + 08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash + + Size: 1084 kB + KernelPageSize: 4 kB + MMUPageSize: 4 kB + Rss: 892 kB + Pss: 374 kB + Shared_Clean: 892 kB + Shared_Dirty: 0 kB + Private_Clean: 0 kB + Private_Dirty: 0 kB + Referenced: 892 kB + Anonymous: 0 kB + LazyFree: 0 kB + AnonHugePages: 0 kB + ShmemPmdMapped: 0 kB + Shared_Hugetlb: 0 kB + Private_Hugetlb: 0 kB + Swap: 0 kB + SwapPss: 0 kB + KernelPageSize: 4 kB + MMUPageSize: 4 kB + Locked: 0 kB + THPeligible: 0 + VmFlags: rd ex mr mw me dw + +The first of these lines shows the same information as is displayed for the +mapping in /proc/PID/maps. Following lines show the size of the mapping +(size); the size of each page allocated when backing a VMA (KernelPageSize), +which is usually the same as the size in the page table entries; the page size +used by the MMU when backing a VMA (in most cases, the same as KernelPageSize); +the amount of the mapping that is currently resident in RAM (RSS); the +process' proportional share of this mapping (PSS); and the number of clean and +dirty shared and private pages in the mapping. + +The "proportional set size" (PSS) of a process is the count of pages it has +in memory, where each page is divided by the number of processes sharing it. +So if a process has 1000 pages all to itself, and 1000 shared with one other +process, its PSS will be 1500. + +Note that even a page which is part of a MAP_SHARED mapping, but has only +a single pte mapped, i.e. is currently used by only one process, is accounted +as private and not as shared. + +"Referenced" indicates the amount of memory currently marked as referenced or +accessed. + +"Anonymous" shows the amount of memory that does not belong to any file. Even +a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE +and a page is modified, the file page is replaced by a private anonymous copy. + +"LazyFree" shows the amount of memory which is marked by madvise(MADV_FREE). +The memory isn't freed immediately with madvise(). It's freed in memory +pressure if the memory is clean. Please note that the printed value might +be lower than the real value due to optimizations used in the current +implementation. If this is not desirable please file a bug report. + +"AnonHugePages" shows the ammount of memory backed by transparent hugepage. + +"ShmemPmdMapped" shows the ammount of shared (shmem/tmpfs) memory backed by +huge pages. + +"Shared_Hugetlb" and "Private_Hugetlb" show the ammounts of memory backed by +hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical +reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field. + +"Swap" shows how much would-be-anonymous memory is also used, but out on swap. + +For shmem mappings, "Swap" includes also the size of the mapped (and not +replaced by copy-on-write) part of the underlying shmem object out on swap. +"SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this +does not take into account swapped out page of underlying shmem objects. +"Locked" indicates whether the mapping is locked in memory or not. +"THPeligible" indicates whether the mapping is eligible for allocating THP +pages - 1 if true, 0 otherwise. It just shows the current status. + +"VmFlags" field deserves a separate description. This member represents the +kernel flags associated with the particular virtual memory area in two letter +encoded manner. The codes are the following: + + == ======================================= + rd readable + wr writeable + ex executable + sh shared + mr may read + mw may write + me may execute + ms may share + gd stack segment growns down + pf pure PFN range + dw disabled write to the mapped file + lo pages are locked in memory + io memory mapped I/O area + sr sequential read advise provided + rr random read advise provided + dc do not copy area on fork + de do not expand area on remapping + ac area is accountable + nr swap space is not reserved for the area + ht area uses huge tlb pages + ar architecture specific flag + dd do not include area into core dump + sd soft dirty flag + mm mixed map area + hg huge page advise flag + nh no huge page advise flag + mg mergable advise flag + == ======================================= + +Note that there is no guarantee that every flag and associated mnemonic will +be present in all further kernel releases. Things get changed, the flags may +be vanished or the reverse -- new added. Interpretation of their meaning +might change in future as well. So each consumer of these flags has to +follow each specific kernel version for the exact semantic. + +This file is only present if the CONFIG_MMU kernel configuration option is +enabled. + +Note: reading /proc/PID/maps or /proc/PID/smaps is inherently racy (consistent +output can be achieved only in the single read call). + +This typically manifests when doing partial reads of these files while the +memory map is being modified. Despite the races, we do provide the following +guarantees: + +1) The mapped addresses never go backwards, which implies no two + regions will ever overlap. +2) If there is something at a given vaddr during the entirety of the + life of the smaps/maps walk, there will be some output for it. + +The /proc/PID/smaps_rollup file includes the same fields as /proc/PID/smaps, +but their values are the sums of the corresponding values for all mappings of +the process. Additionally, it contains these fields: + +- Pss_Anon +- Pss_File +- Pss_Shmem + +They represent the proportional shares of anonymous, file, and shmem pages, as +described for smaps above. These fields are omitted in smaps since each +mapping identifies the type (anon, file, or shmem) of all pages it contains. +Thus all information in smaps_rollup can be derived from smaps, but at a +significantly higher cost. + +The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG +bits on both physical and virtual pages associated with a process, and the +soft-dirty bit on pte (see Documentation/admin-guide/mm/soft-dirty.rst +for details). +To clear the bits for all the pages associated with the process:: + + > echo 1 > /proc/PID/clear_refs + +To clear the bits for the anonymous pages associated with the process:: + + > echo 2 > /proc/PID/clear_refs + +To clear the bits for the file mapped pages associated with the process:: + + > echo 3 > /proc/PID/clear_refs + +To clear the soft-dirty bit:: + + > echo 4 > /proc/PID/clear_refs + +To reset the peak resident set size ("high water mark") to the process's +current value:: + + > echo 5 > /proc/PID/clear_refs + +Any other value written to /proc/PID/clear_refs will have no effect. + +The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags +using /proc/kpageflags and number of times a page is mapped using +/proc/kpagecount. For detailed explanation, see +Documentation/admin-guide/mm/pagemap.rst. + +The /proc/pid/numa_maps is an extension based on maps, showing the memory +locality and binding policy, as well as the memory usage (in pages) of +each mapping. The output follows a general format where mapping details get +summarized separated by blank spaces, one mapping per each file line:: + + address policy mapping details + + 00400000 default file=/usr/local/bin/app mapped=1 active=0 N3=1 kernelpagesize_kB=4 + 00600000 default file=/usr/local/bin/app anon=1 dirty=1 N3=1 kernelpagesize_kB=4 + 3206000000 default file=/lib64/ld-2.12.so mapped=26 mapmax=6 N0=24 N3=2 kernelpagesize_kB=4 + 320621f000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4 + 3206220000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4 + 3206221000 default anon=1 dirty=1 N3=1 kernelpagesize_kB=4 + 3206800000 default file=/lib64/libc-2.12.so mapped=59 mapmax=21 active=55 N0=41 N3=18 kernelpagesize_kB=4 + 320698b000 default file=/lib64/libc-2.12.so + 3206b8a000 default file=/lib64/libc-2.12.so anon=2 dirty=2 N3=2 kernelpagesize_kB=4 + 3206b8e000 default file=/lib64/libc-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4 + 3206b8f000 default anon=3 dirty=3 active=1 N3=3 kernelpagesize_kB=4 + 7f4dc10a2000 default anon=3 dirty=3 N3=3 kernelpagesize_kB=4 + 7f4dc10b4000 default anon=2 dirty=2 active=1 N3=2 kernelpagesize_kB=4 + 7f4dc1200000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N3=1 kernelpagesize_kB=2048 + 7fff335f0000 default stack anon=3 dirty=3 N3=3 kernelpagesize_kB=4 + 7fff3369d000 default mapped=1 mapmax=35 active=0 N3=1 kernelpagesize_kB=4 + +Where: + +"address" is the starting address for the mapping; + +"policy" reports the NUMA memory policy set for the mapping (see Documentation/admin-guide/mm/numa_memory_policy.rst); + +"mapping details" summarizes mapping data such as mapping type, page usage counters, +node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page +size, in KB, that is backing the mapping up. + +1.2 Kernel data +--------------- + +Similar to the process entries, the kernel data files give information about +the running kernel. The files used to obtain this information are contained in +/proc and are listed in Table 1-5. Not all of these will be present in your +system. It depends on the kernel configuration and the loaded modules, which +files are there, and which are missing. + +.. table:: Table 1-5: Kernel info in /proc + + ============ =============================================================== + File Content + ============ =============================================================== + apm Advanced power management info + buddyinfo Kernel memory allocator information (see text) (2.5) + bus Directory containing bus specific information + cmdline Kernel command line + cpuinfo Info about the CPU + devices Available devices (block and character) + dma Used DMS channels + filesystems Supported filesystems + driver Various drivers grouped here, currently rtc (2.4) + execdomains Execdomains, related to security (2.4) + fb Frame Buffer devices (2.4) + fs File system parameters, currently nfs/exports (2.4) + ide Directory containing info about the IDE subsystem + interrupts Interrupt usage + iomem Memory map (2.4) + ioports I/O port usage + irq Masks for irq to cpu affinity (2.4)(smp?) + isapnp ISA PnP (Plug&Play) Info (2.4) + kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4)) + kmsg Kernel messages + ksyms Kernel symbol table + loadavg Load average of last 1, 5 & 15 minutes + locks Kernel locks + meminfo Memory info + misc Miscellaneous + modules List of loaded modules + mounts Mounted filesystems + net Networking info (see text) + pagetypeinfo Additional page allocator information (see text) (2.5) + partitions Table of partitions known to the system + pci Deprecated info of PCI bus (new way -> /proc/bus/pci/, + decoupled by lspci (2.4) + rtc Real time clock + scsi SCSI info (see text) + slabinfo Slab pool info + softirqs softirq usage + stat Overall statistics + swaps Swap space utilization + sys See chapter 2 + sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4) + tty Info of tty drivers + uptime Wall clock since boot, combined idle time of all cpus + version Kernel version + video bttv info of video resources (2.4) + vmallocinfo Show vmalloced areas + ============ =============================================================== + +You can, for example, check which interrupts are currently in use and what +they are used for by looking in the file /proc/interrupts:: + + > cat /proc/interrupts + CPU0 + 0: 8728810 XT-PIC timer + 1: 895 XT-PIC keyboard + 2: 0 XT-PIC cascade + 3: 531695 XT-PIC aha152x + 4: 2014133 XT-PIC serial + 5: 44401 XT-PIC pcnet_cs + 8: 2 XT-PIC rtc + 11: 8 XT-PIC i82365 + 12: 182918 XT-PIC PS/2 Mouse + 13: 1 XT-PIC fpu + 14: 1232265 XT-PIC ide0 + 15: 7 XT-PIC ide1 + NMI: 0 + +In 2.4.* a couple of lines where added to this file LOC & ERR (this time is the +output of a SMP machine):: + + > cat /proc/interrupts + + CPU0 CPU1 + 0: 1243498 1214548 IO-APIC-edge timer + 1: 8949 8958 IO-APIC-edge keyboard + 2: 0 0 XT-PIC cascade + 5: 11286 10161 IO-APIC-edge soundblaster + 8: 1 0 IO-APIC-edge rtc + 9: 27422 27407 IO-APIC-edge 3c503 + 12: 113645 113873 IO-APIC-edge PS/2 Mouse + 13: 0 0 XT-PIC fpu + 14: 22491 24012 IO-APIC-edge ide0 + 15: 2183 2415 IO-APIC-edge ide1 + 17: 30564 30414 IO-APIC-level eth0 + 18: 177 164 IO-APIC-level bttv + NMI: 2457961 2457959 + LOC: 2457882 2457881 + ERR: 2155 + +NMI is incremented in this case because every timer interrupt generates a NMI +(Non Maskable Interrupt) which is used by the NMI Watchdog to detect lockups. + +LOC is the local interrupt counter of the internal APIC of every CPU. + +ERR is incremented in the case of errors in the IO-APIC bus (the bus that +connects the CPUs in a SMP system. This means that an error has been detected, +the IO-APIC automatically retry the transmission, so it should not be a big +problem, but you should read the SMP-FAQ. + +In 2.6.2* /proc/interrupts was expanded again. This time the goal was for +/proc/interrupts to display every IRQ vector in use by the system, not +just those considered 'most important'. The new vectors are: + +THR + interrupt raised when a machine check threshold counter + (typically counting ECC corrected errors of memory or cache) exceeds + a configurable threshold. Only available on some systems. + +TRM + a thermal event interrupt occurs when a temperature threshold + has been exceeded for the CPU. This interrupt may also be generated + when the temperature drops back to normal. + +SPU + a spurious interrupt is some interrupt that was raised then lowered + by some IO device before it could be fully processed by the APIC. Hence + the APIC sees the interrupt but does not know what device it came from. + For this case the APIC will generate the interrupt with a IRQ vector + of 0xff. This might also be generated by chipset bugs. + +RES, CAL, TLB] + rescheduling, call and TLB flush interrupts are + sent from one CPU to another per the needs of the OS. Typically, + their statistics are used by kernel developers and interested users to + determine the occurrence of interrupts of the given type. + +The above IRQ vectors are displayed only when relevant. For example, +the threshold vector does not exist on x86_64 platforms. Others are +suppressed when the system is a uniprocessor. As of this writing, only +i386 and x86_64 platforms support the new IRQ vector displays. + +Of some interest is the introduction of the /proc/irq directory to 2.4. +It could be used to set IRQ to CPU affinity, this means that you can "hook" an +IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the +irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and +prof_cpu_mask. + +For example:: + + > ls /proc/irq/ + 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask + 1 11 13 15 17 19 3 5 7 9 default_smp_affinity + > ls /proc/irq/0/ + smp_affinity + +smp_affinity is a bitmask, in which you can specify which CPUs can handle the +IRQ, you can set it by doing:: + + > echo 1 > /proc/irq/10/smp_affinity + +This means that only the first CPU will handle the IRQ, but you can also echo +5 which means that only the first and third CPU can handle the IRQ. + +The contents of each smp_affinity file is the same by default:: + + > cat /proc/irq/0/smp_affinity + ffffffff + +There is an alternate interface, smp_affinity_list which allows specifying +a cpu range instead of a bitmask:: + + > cat /proc/irq/0/smp_affinity_list + 1024-1031 + +The default_smp_affinity mask applies to all non-active IRQs, which are the +IRQs which have not yet been allocated/activated, and hence which lack a +/proc/irq/[0-9]* directory. + +The node file on an SMP system shows the node to which the device using the IRQ +reports itself as being attached. This hardware locality information does not +include information about any possible driver locality preference. + +prof_cpu_mask specifies which CPUs are to be profiled by the system wide +profiler. Default value is ffffffff (all cpus if there are only 32 of them). + +The way IRQs are routed is handled by the IO-APIC, and it's Round Robin +between all the CPUs which are allowed to handle it. As usual the kernel has +more info than you and does a better job than you, so the defaults are the +best choice for almost everyone. [Note this applies only to those IO-APIC's +that support "Round Robin" interrupt distribution.] + +There are three more important subdirectories in /proc: net, scsi, and sys. +The general rule is that the contents, or even the existence of these +directories, depend on your kernel configuration. If SCSI is not enabled, the +directory scsi may not exist. The same is true with the net, which is there +only when networking support is present in the running kernel. + +The slabinfo file gives information about memory usage at the slab level. +Linux uses slab pools for memory management above page level in version 2.2. +Commonly used objects have their own slab pool (such as network buffers, +directory cache, and so on). + +:: + + > cat /proc/buddyinfo + + Node 0, zone DMA 0 4 5 4 4 3 ... + Node 0, zone Normal 1 0 0 1 101 8 ... + Node 0, zone HighMem 2 0 0 1 1 0 ... + +External fragmentation is a problem under some workloads, and buddyinfo is a +useful tool for helping diagnose these problems. Buddyinfo will give you a +clue as to how big an area you can safely allocate, or why a previous +allocation failed. + +Each column represents the number of pages of a certain order which are +available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in +ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE +available in ZONE_NORMAL, etc... + +More information relevant to external fragmentation can be found in +pagetypeinfo:: + + > cat /proc/pagetypeinfo + Page block order: 9 + Pages per block: 512 + + Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10 + Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0 + Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0 + Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2 + Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0 + Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0 + Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9 + Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0 + Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452 + Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0 + Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0 + + Number of blocks type Unmovable Reclaimable Movable Reserve Isolate + Node 0, zone DMA 2 0 5 1 0 + Node 0, zone DMA32 41 6 967 2 0 + +Fragmentation avoidance in the kernel works by grouping pages of different +migrate types into the same contiguous regions of memory called page blocks. +A page block is typically the size of the default hugepage size e.g. 2MB on +X86-64. By keeping pages grouped based on their ability to move, the kernel +can reclaim pages within a page block to satisfy a high-order allocation. + +The pagetypinfo begins with information on the size of a page block. It +then gives the same type of information as buddyinfo except broken down +by migrate-type and finishes with details on how many page blocks of each +type exist. + +If min_free_kbytes has been tuned correctly (recommendations made by hugeadm +from libhugetlbfs https://github.com/libhugetlbfs/libhugetlbfs/), one can +make an estimate of the likely number of huge pages that can be allocated +at a given point in time. All the "Movable" blocks should be allocatable +unless memory has been mlock()'d. Some of the Reclaimable blocks should +also be allocatable although a lot of filesystem metadata may have to be +reclaimed to achieve this. + + +meminfo +~~~~~~~ + +Provides information about distribution and utilization of memory. This +varies by architecture and compile options. The following is from a +16GB PIII, which has highmem enabled. You may not have all of these fields. + +:: + + > cat /proc/meminfo + + MemTotal: 16344972 kB + MemFree: 13634064 kB + MemAvailable: 14836172 kB + Buffers: 3656 kB + Cached: 1195708 kB + SwapCached: 0 kB + Active: 891636 kB + Inactive: 1077224 kB + HighTotal: 15597528 kB + HighFree: 13629632 kB + LowTotal: 747444 kB + LowFree: 4432 kB + SwapTotal: 0 kB + SwapFree: 0 kB + Dirty: 968 kB + Writeback: 0 kB + AnonPages: 861800 kB + Mapped: 280372 kB + Shmem: 644 kB + KReclaimable: 168048 kB + Slab: 284364 kB + SReclaimable: 159856 kB + SUnreclaim: 124508 kB + PageTables: 24448 kB + NFS_Unstable: 0 kB + Bounce: 0 kB + WritebackTmp: 0 kB + CommitLimit: 7669796 kB + Committed_AS: 100056 kB + VmallocTotal: 112216 kB + VmallocUsed: 428 kB + VmallocChunk: 111088 kB + Percpu: 62080 kB + HardwareCorrupted: 0 kB + AnonHugePages: 49152 kB + ShmemHugePages: 0 kB + ShmemPmdMapped: 0 kB + +MemTotal + Total usable ram (i.e. physical ram minus a few reserved + bits and the kernel binary code) +MemFree + The sum of LowFree+HighFree +MemAvailable + An estimate of how much memory is available for starting new + applications, without swapping. Calculated from MemFree, + SReclaimable, the size of the file LRU lists, and the low + watermarks in each zone. + The estimate takes into account that the system needs some + page cache to function well, and that not all reclaimable + slab will be reclaimable, due to items being in use. The + impact of those factors will vary from system to system. +Buffers + Relatively temporary storage for raw disk blocks + shouldn't get tremendously large (20MB or so) +Cached + in-memory cache for files read from the disk (the + pagecache). Doesn't include SwapCached +SwapCached + Memory that once was swapped out, is swapped back in but + still also is in the swapfile (if memory is needed it + doesn't need to be swapped out AGAIN because it is already + in the swapfile. This saves I/O) +Active + Memory that has been used more recently and usually not + reclaimed unless absolutely necessary. +Inactive + Memory which has been less recently used. It is more + eligible to be reclaimed for other purposes +HighTotal, HighFree + Highmem is all memory above ~860MB of physical memory + Highmem areas are for use by userspace programs, or + for the pagecache. The kernel must use tricks to access + this memory, making it slower to access than lowmem. +LowTotal, LowFree + Lowmem is memory which can be used for everything that + highmem can be used for, but it is also available for the + kernel's use for its own data structures. Among many + other things, it is where everything from the Slab is + allocated. Bad things happen when you're out of lowmem. +SwapTotal + total amount of swap space available +SwapFree + Memory which has been evicted from RAM, and is temporarily + on the disk +Dirty + Memory which is waiting to get written back to the disk +Writeback + Memory which is actively being written back to the disk +AnonPages + Non-file backed pages mapped into userspace page tables +HardwareCorrupted + The amount of RAM/memory in KB, the kernel identifies as + corrupted. +AnonHugePages + Non-file backed huge pages mapped into userspace page tables +Mapped + files which have been mmaped, such as libraries +Shmem + Total memory used by shared memory (shmem) and tmpfs +ShmemHugePages + Memory used by shared memory (shmem) and tmpfs allocated + with huge pages +ShmemPmdMapped + Shared memory mapped into userspace with huge pages +KReclaimable + Kernel allocations that the kernel will attempt to reclaim + under memory pressure. Includes SReclaimable (below), and other + direct allocations with a shrinker. +Slab + in-kernel data structures cache +SReclaimable + Part of Slab, that might be reclaimed, such as caches +SUnreclaim + Part of Slab, that cannot be reclaimed on memory pressure +PageTables + amount of memory dedicated to the lowest level of page + tables. +NFS_Unstable + NFS pages sent to the server, but not yet committed to stable + storage +Bounce + Memory used for block device "bounce buffers" +WritebackTmp + Memory used by FUSE for temporary writeback buffers +CommitLimit + Based on the overcommit ratio ('vm.overcommit_ratio'), + this is the total amount of memory currently available to + be allocated on the system. This limit is only adhered to + if strict overcommit accounting is enabled (mode 2 in + 'vm.overcommit_memory'). + + The CommitLimit is calculated with the following formula:: + + CommitLimit = ([total RAM pages] - [total huge TLB pages]) * + overcommit_ratio / 100 + [total swap pages] + + For example, on a system with 1G of physical RAM and 7G + of swap with a `vm.overcommit_ratio` of 30 it would + yield a CommitLimit of 7.3G. + + For more details, see the memory overcommit documentation + in vm/overcommit-accounting. +Committed_AS + The amount of memory presently allocated on the system. + The committed memory is a sum of all of the memory which + has been allocated by processes, even if it has not been + "used" by them as of yet. A process which malloc()'s 1G + of memory, but only touches 300M of it will show up as + using 1G. This 1G is memory which has been "committed" to + by the VM and can be used at any time by the allocating + application. With strict overcommit enabled on the system + (mode 2 in 'vm.overcommit_memory'),allocations which would + exceed the CommitLimit (detailed above) will not be permitted. + This is useful if one needs to guarantee that processes will + not fail due to lack of memory once that memory has been + successfully allocated. +VmallocTotal + total size of vmalloc memory area +VmallocUsed + amount of vmalloc area which is used +VmallocChunk + largest contiguous block of vmalloc area which is free +Percpu + Memory allocated to the percpu allocator used to back percpu + allocations. This stat excludes the cost of metadata. + +vmallocinfo +~~~~~~~~~~~ + +Provides information about vmalloced/vmaped areas. One line per area, +containing the virtual address range of the area, size in bytes, +caller information of the creator, and optional information depending +on the kind of area : + + ========== =================================================== + pages=nr number of pages + phys=addr if a physical address was specified + ioremap I/O mapping (ioremap() and friends) + vmalloc vmalloc() area + vmap vmap()ed pages + user VM_USERMAP area + vpages buffer for pages pointers was vmalloced (huge area) + N=nr (Only on NUMA kernels) + Number of pages allocated on memory node + ========== =================================================== + +:: + + > cat /proc/vmallocinfo + 0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ... + /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128 + 0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ... + /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64 + 0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f... + phys=7fee8000 ioremap + 0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f... + phys=7fee7000 ioremap + 0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210 + 0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ... + /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3 + 0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ... + pages=2 vmalloc N1=2 + 0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ... + /0x130 [x_tables] pages=4 vmalloc N0=4 + 0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ... + pages=14 vmalloc N2=14 + 0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ... + pages=4 vmalloc N1=4 + 0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ... + pages=2 vmalloc N1=2 + 0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ... + pages=10 vmalloc N0=10 + + +softirqs +~~~~~~~~ + +Provides counts of softirq handlers serviced since boot time, for each cpu. + +:: + + > cat /proc/softirqs + CPU0 CPU1 CPU2 CPU3 + HI: 0 0 0 0 + TIMER: 27166 27120 27097 27034 + NET_TX: 0 0 0 17 + NET_RX: 42 0 0 39 + BLOCK: 0 0 107 1121 + TASKLET: 0 0 0 290 + SCHED: 27035 26983 26971 26746 + HRTIMER: 0 0 0 0 + RCU: 1678 1769 2178 2250 + + +1.3 IDE devices in /proc/ide +---------------------------- + +The subdirectory /proc/ide contains information about all IDE devices of which +the kernel is aware. There is one subdirectory for each IDE controller, the +file drivers and a link for each IDE device, pointing to the device directory +in the controller specific subtree. + +The file drivers contains general information about the drivers used for the +IDE devices:: + + > cat /proc/ide/drivers + ide-cdrom version 4.53 + ide-disk version 1.08 + +More detailed information can be found in the controller specific +subdirectories. These are named ide0, ide1 and so on. Each of these +directories contains the files shown in table 1-6. + + +.. table:: Table 1-6: IDE controller info in /proc/ide/ide? + + ======= ======================================= + File Content + ======= ======================================= + channel IDE channel (0 or 1) + config Configuration (only for PCI/IDE bridge) + mate Mate name + model Type/Chipset of IDE controller + ======= ======================================= + +Each device connected to a controller has a separate subdirectory in the +controllers directory. The files listed in table 1-7 are contained in these +directories. + + +.. table:: Table 1-7: IDE device information + + ================ ========================================== + File Content + ================ ========================================== + cache The cache + capacity Capacity of the medium (in 512Byte blocks) + driver driver and version + geometry physical and logical geometry + identify device identify block + media media type + model device identifier + settings device setup + smart_thresholds IDE disk management thresholds + smart_values IDE disk management values + ================ ========================================== + +The most interesting file is ``settings``. This file contains a nice +overview of the drive parameters:: + + # cat /proc/ide/ide0/hda/settings + name value min max mode + ---- ----- --- --- ---- + bios_cyl 526 0 65535 rw + bios_head 255 0 255 rw + bios_sect 63 0 63 rw + breada_readahead 4 0 127 rw + bswap 0 0 1 r + file_readahead 72 0 2097151 rw + io_32bit 0 0 3 rw + keepsettings 0 0 1 rw + max_kb_per_request 122 1 127 rw + multcount 0 0 8 rw + nice1 1 0 1 rw + nowerr 0 0 1 rw + pio_mode write-only 0 255 w + slow 0 0 1 rw + unmaskirq 0 0 1 rw + using_dma 0 0 1 rw + + +1.4 Networking info in /proc/net +-------------------------------- + +The subdirectory /proc/net follows the usual pattern. Table 1-8 shows the +additional values you get for IP version 6 if you configure the kernel to +support this. Table 1-9 lists the files and their meaning. + + +.. table:: Table 1-8: IPv6 info in /proc/net + + ========== ===================================================== + File Content + ========== ===================================================== + udp6 UDP sockets (IPv6) + tcp6 TCP sockets (IPv6) + raw6 Raw device statistics (IPv6) + igmp6 IP multicast addresses, which this host joined (IPv6) + if_inet6 List of IPv6 interface addresses + ipv6_route Kernel routing table for IPv6 + rt6_stats Global IPv6 routing tables statistics + sockstat6 Socket statistics (IPv6) + snmp6 Snmp data (IPv6) + ========== ===================================================== + +.. table:: Table 1-9: Network info in /proc/net + + ============= ================================================================ + File Content + ============= ================================================================ + arp Kernel ARP table + dev network devices with statistics + dev_mcast the Layer2 multicast groups a device is listening too + (interface index, label, number of references, number of bound + addresses). + dev_stat network device status + ip_fwchains Firewall chain linkage + ip_fwnames Firewall chain names + ip_masq Directory containing the masquerading tables + ip_masquerade Major masquerading table + netstat Network statistics + raw raw device statistics + route Kernel routing table + rpc Directory containing rpc info + rt_cache Routing cache + snmp SNMP data + sockstat Socket statistics + tcp TCP sockets + udp UDP sockets + unix UNIX domain sockets + wireless Wireless interface data (Wavelan etc) + igmp IP multicast addresses, which this host joined + psched Global packet scheduler parameters. + netlink List of PF_NETLINK sockets + ip_mr_vifs List of multicast virtual interfaces + ip_mr_cache List of multicast routing cache + ============= ================================================================ + +You can use this information to see which network devices are available in +your system and how much traffic was routed over those devices:: + + > cat /proc/net/dev + Inter-|Receive |[... + face |bytes packets errs drop fifo frame compressed multicast|[... + lo: 908188 5596 0 0 0 0 0 0 [... + ppp0:15475140 20721 410 0 0 410 0 0 [... + eth0: 614530 7085 0 0 0 0 0 1 [... + + ...] Transmit + ...] bytes packets errs drop fifo colls carrier compressed + ...] 908188 5596 0 0 0 0 0 0 + ...] 1375103 17405 0 0 0 0 0 0 + ...] 1703981 5535 0 0 0 3 0 0 + +In addition, each Channel Bond interface has its own directory. For +example, the bond0 device will have a directory called /proc/net/bond0/. +It will contain information that is specific to that bond, such as the +current slaves of the bond, the link status of the slaves, and how +many times the slaves link has failed. + +1.5 SCSI info +------------- + +If you have a SCSI host adapter in your system, you'll find a subdirectory +named after the driver for this adapter in /proc/scsi. You'll also see a list +of all recognized SCSI devices in /proc/scsi:: + + >cat /proc/scsi/scsi + Attached devices: + Host: scsi0 Channel: 00 Id: 00 Lun: 00 + Vendor: IBM Model: DGHS09U Rev: 03E0 + Type: Direct-Access ANSI SCSI revision: 03 + Host: scsi0 Channel: 00 Id: 06 Lun: 00 + Vendor: PIONEER Model: CD-ROM DR-U06S Rev: 1.04 + Type: CD-ROM ANSI SCSI revision: 02 + + +The directory named after the driver has one file for each adapter found in +the system. These files contain information about the controller, including +the used IRQ and the IO address range. The amount of information shown is +dependent on the adapter you use. The example shows the output for an Adaptec +AHA-2940 SCSI adapter:: + + > cat /proc/scsi/aic7xxx/0 + + Adaptec AIC7xxx driver version: 5.1.19/3.2.4 + Compile Options: + TCQ Enabled By Default : Disabled + AIC7XXX_PROC_STATS : Disabled + AIC7XXX_RESET_DELAY : 5 + Adapter Configuration: + SCSI Adapter: Adaptec AHA-294X Ultra SCSI host adapter + Ultra Wide Controller + PCI MMAPed I/O Base: 0xeb001000 + Adapter SEEPROM Config: SEEPROM found and used. + Adaptec SCSI BIOS: Enabled + IRQ: 10 + SCBs: Active 0, Max Active 2, + Allocated 15, HW 16, Page 255 + Interrupts: 160328 + BIOS Control Word: 0x18b6 + Adapter Control Word: 0x005b + Extended Translation: Enabled + Disconnect Enable Flags: 0xffff + Ultra Enable Flags: 0x0001 + Tag Queue Enable Flags: 0x0000 + Ordered Queue Tag Flags: 0x0000 + Default Tag Queue Depth: 8 + Tagged Queue By Device array for aic7xxx host instance 0: + {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255} + Actual queue depth per device for aic7xxx host instance 0: + {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1} + Statistics: + (scsi0:0:0:0) + Device using Wide/Sync transfers at 40.0 MByte/sec, offset 8 + Transinfo settings: current(12/8/1/0), goal(12/8/1/0), user(12/15/1/0) + Total transfers 160151 (74577 reads and 85574 writes) + (scsi0:0:6:0) + Device using Narrow/Sync transfers at 5.0 MByte/sec, offset 15 + Transinfo settings: current(50/15/0/0), goal(50/15/0/0), user(50/15/0/0) + Total transfers 0 (0 reads and 0 writes) + + +1.6 Parallel port info in /proc/parport +--------------------------------------- + +The directory /proc/parport contains information about the parallel ports of +your system. It has one subdirectory for each port, named after the port +number (0,1,2,...). + +These directories contain the four files shown in Table 1-10. + + +.. table:: Table 1-10: Files in /proc/parport + + ========= ==================================================================== + File Content + ========= ==================================================================== + autoprobe Any IEEE-1284 device ID information that has been acquired. + devices list of the device drivers using that port. A + will appear by the + name of the device currently using the port (it might not appear + against any). + hardware Parallel port's base address, IRQ line and DMA channel. + irq IRQ that parport is using for that port. This is in a separate + file to allow you to alter it by writing a new value in (IRQ + number or none). + ========= ==================================================================== + +1.7 TTY info in /proc/tty +------------------------- + +Information about the available and actually used tty's can be found in the +directory /proc/tty.You'll find entries for drivers and line disciplines in +this directory, as shown in Table 1-11. + + +.. table:: Table 1-11: Files in /proc/tty + + ============= ============================================== + File Content + ============= ============================================== + drivers list of drivers and their usage + ldiscs registered line disciplines + driver/serial usage statistic and status of single tty lines + ============= ============================================== + +To see which tty's are currently in use, you can simply look into the file +/proc/tty/drivers:: + + > cat /proc/tty/drivers + pty_slave /dev/pts 136 0-255 pty:slave + pty_master /dev/ptm 128 0-255 pty:master + pty_slave /dev/ttyp 3 0-255 pty:slave + pty_master /dev/pty 2 0-255 pty:master + serial /dev/cua 5 64-67 serial:callout + serial /dev/ttyS 4 64-67 serial + /dev/tty0 /dev/tty0 4 0 system:vtmaster + /dev/ptmx /dev/ptmx 5 2 system + /dev/console /dev/console 5 1 system:console + /dev/tty /dev/tty 5 0 system:/dev/tty + unknown /dev/tty 4 1-63 console + + +1.8 Miscellaneous kernel statistics in /proc/stat +------------------------------------------------- + +Various pieces of information about kernel activity are available in the +/proc/stat file. All of the numbers reported in this file are aggregates +since the system first booted. For a quick look, simply cat the file:: + + > cat /proc/stat + cpu 2255 34 2290 22625563 6290 127 456 0 0 0 + cpu0 1132 34 1441 11311718 3675 127 438 0 0 0 + cpu1 1123 0 849 11313845 2614 0 18 0 0 0 + intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...] + ctxt 1990473 + btime 1062191376 + processes 2915 + procs_running 1 + procs_blocked 0 + softirq 183433 0 21755 12 39 1137 231 21459 2263 + +The very first "cpu" line aggregates the numbers in all of the other "cpuN" +lines. These numbers identify the amount of time the CPU has spent performing +different kinds of work. Time units are in USER_HZ (typically hundredths of a +second). The meanings of the columns are as follows, from left to right: + +- user: normal processes executing in user mode +- nice: niced processes executing in user mode +- system: processes executing in kernel mode +- idle: twiddling thumbs +- iowait: In a word, iowait stands for waiting for I/O to complete. But there + are several problems: + + 1. Cpu will not wait for I/O to complete, iowait is the time that a task is + waiting for I/O to complete. When cpu goes into idle state for + outstanding task io, another task will be scheduled on this CPU. + 2. In a multi-core CPU, the task waiting for I/O to complete is not running + on any CPU, so the iowait of each CPU is difficult to calculate. + 3. The value of iowait field in /proc/stat will decrease in certain + conditions. + + So, the iowait is not reliable by reading from /proc/stat. +- irq: servicing interrupts +- softirq: servicing softirqs +- steal: involuntary wait +- guest: running a normal guest +- guest_nice: running a niced guest + +The "intr" line gives counts of interrupts serviced since boot time, for each +of the possible system interrupts. The first column is the total of all +interrupts serviced including unnumbered architecture specific interrupts; +each subsequent column is the total for that particular numbered interrupt. +Unnumbered interrupts are not shown, only summed into the total. + +The "ctxt" line gives the total number of context switches across all CPUs. + +The "btime" line gives the time at which the system booted, in seconds since +the Unix epoch. + +The "processes" line gives the number of processes and threads created, which +includes (but is not limited to) those created by calls to the fork() and +clone() system calls. + +The "procs_running" line gives the total number of threads that are +running or ready to run (i.e., the total number of runnable threads). + +The "procs_blocked" line gives the number of processes currently blocked, +waiting for I/O to complete. + +The "softirq" line gives counts of softirqs serviced since boot time, for each +of the possible system softirqs. The first column is the total of all +softirqs serviced; each subsequent column is the total for that particular +softirq. + + +1.9 Ext4 file system parameters +------------------------------- + +Information about mounted ext4 file systems can be found in +/proc/fs/ext4. Each mounted filesystem will have a directory in +/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or +/proc/fs/ext4/dm-0). The files in each per-device directory are shown +in Table 1-12, below. + +.. table:: Table 1-12: Files in /proc/fs/ext4/ + + ============== ========================================================== + File Content + mb_groups details of multiblock allocator buddy cache of free blocks + ============== ========================================================== + +2.0 /proc/consoles +------------------ +Shows registered system console lines. + +To see which character device lines are currently used for the system console +/dev/console, you may simply look into the file /proc/consoles:: + + > cat /proc/consoles + tty0 -WU (ECp) 4:7 + ttyS0 -W- (Ep) 4:64 + +The columns are: + ++--------------------+-------------------------------------------------------+ +| device | name of the device | ++====================+=======================================================+ +| operations | * R = can do read operations | +| | * W = can do write operations | +| | * U = can do unblank | ++--------------------+-------------------------------------------------------+ +| flags | * E = it is enabled | +| | * C = it is preferred console | +| | * B = it is primary boot console | +| | * p = it is used for printk buffer | +| | * b = it is not a TTY but a Braille device | +| | * a = it is safe to use when cpu is offline | ++--------------------+-------------------------------------------------------+ +| major:minor | major and minor number of the device separated by a | +| | colon | ++--------------------+-------------------------------------------------------+ + +Summary +------- + +The /proc file system serves information about the running system. It not only +allows access to process data but also allows you to request the kernel status +by reading files in the hierarchy. + +The directory structure of /proc reflects the types of information and makes +it easy, if not obvious, where to look for specific data. + +Chapter 2: Modifying System Parameters +====================================== + +In This Chapter +--------------- + +* Modifying kernel parameters by writing into files found in /proc/sys +* Exploring the files which modify certain parameters +* Review of the /proc/sys file tree + +------------------------------------------------------------------------------ + +A very interesting part of /proc is the directory /proc/sys. This is not only +a source of information, it also allows you to change parameters within the +kernel. Be very careful when attempting this. You can optimize your system, +but you can also cause it to crash. Never alter kernel parameters on a +production system. Set up a development machine and test to make sure that +everything works the way you want it to. You may have no alternative but to +reboot the machine once an error has been made. + +To change a value, simply echo the new value into the file. An example is +given below in the section on the file system data. You need to be root to do +this. You can create your own boot script to perform this every time your +system boots. + +The files in /proc/sys can be used to fine tune and monitor miscellaneous and +general things in the operation of the Linux kernel. Since some of the files +can inadvertently disrupt your system, it is advisable to read both +documentation and source before actually making adjustments. In any case, be +very careful when writing to any of these files. The entries in /proc may +change slightly between the 2.1.* and the 2.2 kernel, so if there is any doubt +review the kernel documentation in the directory /usr/src/linux/Documentation. +This chapter is heavily based on the documentation included in the pre 2.2 +kernels, and became part of it in version 2.2.1 of the Linux kernel. + +Please see: Documentation/admin-guide/sysctl/ directory for descriptions of these +entries. + +Summary +------- + +Certain aspects of kernel behavior can be modified at runtime, without the +need to recompile the kernel, or even to reboot the system. The files in the +/proc/sys tree can not only be read, but also modified. You can use the echo +command to write value into these files, thereby changing the default settings +of the kernel. + + +Chapter 3: Per-process Parameters +================================= + +3.1 /proc//oom_adj & /proc//oom_score_adj- Adjust the oom-killer score +-------------------------------------------------------------------------------- + +These file can be used to adjust the badness heuristic used to select which +process gets killed in out of memory conditions. + +The badness heuristic assigns a value to each candidate task ranging from 0 +(never kill) to 1000 (always kill) to determine which process is targeted. The +units are roughly a proportion along that range of allowed memory the process +may allocate from based on an estimation of its current memory and swap use. +For example, if a task is using all allowed memory, its badness score will be +1000. If it is using half of its allowed memory, its score will be 500. + +There is an additional factor included in the badness score: the current memory +and swap usage is discounted by 3% for root processes. + +The amount of "allowed" memory depends on the context in which the oom killer +was called. If it is due to the memory assigned to the allocating task's cpuset +being exhausted, the allowed memory represents the set of mems assigned to that +cpuset. If it is due to a mempolicy's node(s) being exhausted, the allowed +memory represents the set of mempolicy nodes. If it is due to a memory +limit (or swap limit) being reached, the allowed memory is that configured +limit. Finally, if it is due to the entire system being out of memory, the +allowed memory represents all allocatable resources. + +The value of /proc//oom_score_adj is added to the badness score before it +is used to determine which task to kill. Acceptable values range from -1000 +(OOM_SCORE_ADJ_MIN) to +1000 (OOM_SCORE_ADJ_MAX). This allows userspace to +polarize the preference for oom killing either by always preferring a certain +task or completely disabling it. The lowest possible value, -1000, is +equivalent to disabling oom killing entirely for that task since it will always +report a badness score of 0. + +Consequently, it is very simple for userspace to define the amount of memory to +consider for each task. Setting a /proc//oom_score_adj value of +500, for +example, is roughly equivalent to allowing the remainder of tasks sharing the +same system, cpuset, mempolicy, or memory controller resources to use at least +50% more memory. A value of -500, on the other hand, would be roughly +equivalent to discounting 50% of the task's allowed memory from being considered +as scoring against the task. + +For backwards compatibility with previous kernels, /proc//oom_adj may also +be used to tune the badness score. Its acceptable values range from -16 +(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 +(OOM_DISABLE) to disable oom killing entirely for that task. Its value is +scaled linearly with /proc//oom_score_adj. + +The value of /proc//oom_score_adj may be reduced no lower than the last +value set by a CAP_SYS_RESOURCE process. To reduce the value any lower +requires CAP_SYS_RESOURCE. + +Caveat: when a parent task is selected, the oom killer will sacrifice any first +generation children with separate address spaces instead, if possible. This +avoids servers and important system daemons from being killed and loses the +minimal amount of work. + + +3.2 /proc//oom_score - Display current oom-killer score +------------------------------------------------------------- + +This file can be used to check the current score used by the oom-killer is for +any given . Use it together with /proc//oom_score_adj to tune which +process should be killed in an out-of-memory situation. + + +3.3 /proc//io - Display the IO accounting fields +------------------------------------------------------- + +This file contains IO statistics for each running process + +Example +~~~~~~~ + +:: + + test:/tmp # dd if=/dev/zero of=/tmp/test.dat & + [1] 3828 + + test:/tmp # cat /proc/3828/io + rchar: 323934931 + wchar: 323929600 + syscr: 632687 + syscw: 632675 + read_bytes: 0 + write_bytes: 323932160 + cancelled_write_bytes: 0 + + +Description +~~~~~~~~~~~ + +rchar +^^^^^ + +I/O counter: chars read +The number of bytes which this task has caused to be read from storage. This +is simply the sum of bytes which this process passed to read() and pread(). +It includes things like tty IO and it is unaffected by whether or not actual +physical disk IO was required (the read might have been satisfied from +pagecache) + + +wchar +^^^^^ + +I/O counter: chars written +The number of bytes which this task has caused, or shall cause to be written +to disk. Similar caveats apply here as with rchar. + + +syscr +^^^^^ + +I/O counter: read syscalls +Attempt to count the number of read I/O operations, i.e. syscalls like read() +and pread(). + + +syscw +^^^^^ + +I/O counter: write syscalls +Attempt to count the number of write I/O operations, i.e. syscalls like +write() and pwrite(). + + +read_bytes +^^^^^^^^^^ + +I/O counter: bytes read +Attempt to count the number of bytes which this process really did cause to +be fetched from the storage layer. Done at the submit_bio() level, so it is +accurate for block-backed filesystems. + + +write_bytes +^^^^^^^^^^^ + +I/O counter: bytes written +Attempt to count the number of bytes which this process caused to be sent to +the storage layer. This is done at page-dirtying time. + + +cancelled_write_bytes +^^^^^^^^^^^^^^^^^^^^^ + +The big inaccuracy here is truncate. If a process writes 1MB to a file and +then deletes the file, it will in fact perform no writeout. But it will have +been accounted as having caused 1MB of write. +In other words: The number of bytes which this process caused to not happen, +by truncating pagecache. A task can cause "negative" IO too. If this task +truncates some dirty pagecache, some IO which another task has been accounted +for (in its write_bytes) will not be happening. We _could_ just subtract that +from the truncating task's write_bytes, but there is information loss in doing +that. + + +.. Note:: + + At its current implementation state, this is a bit racy on 32-bit machines: + if process A reads process B's /proc/pid/io while process B is updating one + of those 64-bit counters, process A could see an intermediate result. + + +More information about this can be found within the taskstats documentation in +Documentation/accounting. + +3.4 /proc//coredump_filter - Core dump filtering settings +--------------------------------------------------------------- +When a process is dumped, all anonymous memory is written to a core file as +long as the size of the core file isn't limited. But sometimes we don't want +to dump some memory segments, for example, huge shared memory or DAX. +Conversely, sometimes we want to save file-backed memory segments into a core +file, not only the individual files. + +/proc//coredump_filter allows you to customize which memory segments +will be dumped when the process is dumped. coredump_filter is a bitmask +of memory types. If a bit of the bitmask is set, memory segments of the +corresponding memory type are dumped, otherwise they are not dumped. + +The following 9 memory types are supported: + + - (bit 0) anonymous private memory + - (bit 1) anonymous shared memory + - (bit 2) file-backed private memory + - (bit 3) file-backed shared memory + - (bit 4) ELF header pages in file-backed private memory areas (it is + effective only if the bit 2 is cleared) + - (bit 5) hugetlb private memory + - (bit 6) hugetlb shared memory + - (bit 7) DAX private memory + - (bit 8) DAX shared memory + + Note that MMIO pages such as frame buffer are never dumped and vDSO pages + are always dumped regardless of the bitmask status. + + Note that bits 0-4 don't affect hugetlb or DAX memory. hugetlb memory is + only affected by bit 5-6, and DAX is only affected by bits 7-8. + +The default value of coredump_filter is 0x33; this means all anonymous memory +segments, ELF header pages and hugetlb private memory are dumped. + +If you don't want to dump all shared memory segments attached to pid 1234, +write 0x31 to the process's proc file:: + + $ echo 0x31 > /proc/1234/coredump_filter + +When a new process is created, the process inherits the bitmask status from its +parent. It is useful to set up coredump_filter before the program runs. +For example:: + + $ echo 0x7 > /proc/self/coredump_filter + $ ./some_program + +3.5 /proc//mountinfo - Information about mounts +-------------------------------------------------------- + +This file contains lines of the form:: + + 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) + + (1) mount ID: unique identifier of the mount (may be reused after umount) + (2) parent ID: ID of parent (or of self for the top of the mount tree) + (3) major:minor: value of st_dev for files on filesystem + (4) root: root of the mount within the filesystem + (5) mount point: mount point relative to the process's root + (6) mount options: per mount options + (7) optional fields: zero or more fields of the form "tag[:value]" + (8) separator: marks the end of the optional fields + (9) filesystem type: name of filesystem of the form "type[.subtype]" + (10) mount source: filesystem specific information or "none" + (11) super options: per super block options + +Parsers should ignore all unrecognised optional fields. Currently the +possible optional fields are: + +================ ============================================================== +shared:X mount is shared in peer group X +master:X mount is slave to peer group X +propagate_from:X mount is slave and receives propagation from peer group X [#]_ +unbindable mount is unbindable +================ ============================================================== + +.. [#] X is the closest dominant peer group under the process's root. If + X is the immediate master of the mount, or if there's no dominant peer + group under the same root, then only the "master:X" field is present + and not the "propagate_from:X" field. + +For more information on mount propagation see: + + Documentation/filesystems/sharedsubtree.txt + + +3.6 /proc//comm & /proc//task//comm +-------------------------------------------------------- +These files provide a method to access a tasks comm value. It also allows for +a task to set its own or one of its thread siblings comm value. The comm value +is limited in size compared to the cmdline value, so writing anything longer +then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated +comm value. + + +3.7 /proc//task//children - Information about task children +------------------------------------------------------------------------- +This file provides a fast way to retrieve first level children pids +of a task pointed by / pair. The format is a space separated +stream of pids. + +Note the "first level" here -- if a child has own children they will +not be listed here, one needs to read /proc//task//children +to obtain the descendants. + +Since this interface is intended to be fast and cheap it doesn't +guarantee to provide precise results and some children might be +skipped, especially if they've exited right after we printed their +pids, so one need to either stop or freeze processes being inspected +if precise results are needed. + + +3.8 /proc//fdinfo/ - Information about opened file +--------------------------------------------------------------- +This file provides information associated with an opened file. The regular +files have at least three fields -- 'pos', 'flags' and mnt_id. The 'pos' +represents the current offset of the opened file in decimal form [see lseek(2) +for details], 'flags' denotes the octal O_xxx mask the file has been +created with [see open(2) for details] and 'mnt_id' represents mount ID of +the file system containing the opened file [see 3.5 /proc//mountinfo +for details]. + +A typical output is:: + + pos: 0 + flags: 0100002 + mnt_id: 19 + +All locks associated with a file descriptor are shown in its fdinfo too:: + + lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF + +The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags +pair provide additional information particular to the objects they represent. + +Eventfd files +~~~~~~~~~~~~~ + +:: + + pos: 0 + flags: 04002 + mnt_id: 9 + eventfd-count: 5a + +where 'eventfd-count' is hex value of a counter. + +Signalfd files +~~~~~~~~~~~~~~ + +:: + + pos: 0 + flags: 04002 + mnt_id: 9 + sigmask: 0000000000000200 + +where 'sigmask' is hex value of the signal mask associated +with a file. + +Epoll files +~~~~~~~~~~~ + +:: + + pos: 0 + flags: 02 + mnt_id: 9 + tfd: 5 events: 1d data: ffffffffffffffff pos:0 ino:61af sdev:7 + +where 'tfd' is a target file descriptor number in decimal form, +'events' is events mask being watched and the 'data' is data +associated with a target [see epoll(7) for more details]. + +The 'pos' is current offset of the target file in decimal form +[see lseek(2)], 'ino' and 'sdev' are inode and device numbers +where target file resides, all in hex format. + +Fsnotify files +~~~~~~~~~~~~~~ +For inotify files the format is the following:: + + pos: 0 + flags: 02000000 + inotify wd:3 ino:9e7e sdev:800013 mask:800afce ignored_mask:0 fhandle-bytes:8 fhandle-type:1 f_handle:7e9e0000640d1b6d + +where 'wd' is a watch descriptor in decimal form, ie a target file +descriptor number, 'ino' and 'sdev' are inode and device where the +target file resides and the 'mask' is the mask of events, all in hex +form [see inotify(7) for more details]. + +If the kernel was built with exportfs support, the path to the target +file is encoded as a file handle. The file handle is provided by three +fields 'fhandle-bytes', 'fhandle-type' and 'f_handle', all in hex +format. + +If the kernel is built without exportfs support the file handle won't be +printed out. + +If there is no inotify mark attached yet the 'inotify' line will be omitted. + +For fanotify files the format is:: + + pos: 0 + flags: 02 + mnt_id: 9 + fanotify flags:10 event-flags:0 + fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003 + fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4 + +where fanotify 'flags' and 'event-flags' are values used in fanotify_init +call, 'mnt_id' is the mount point identifier, 'mflags' is the value of +flags associated with mark which are tracked separately from events +mask. 'ino', 'sdev' are target inode and device, 'mask' is the events +mask and 'ignored_mask' is the mask of events which are to be ignored. +All in hex format. Incorporation of 'mflags', 'mask' and 'ignored_mask' +does provide information about flags and mask used in fanotify_mark +call [see fsnotify manpage for details]. + +While the first three lines are mandatory and always printed, the rest is +optional and may be omitted if no marks created yet. + +Timerfd files +~~~~~~~~~~~~~ + +:: + + pos: 0 + flags: 02 + mnt_id: 9 + clockid: 0 + ticks: 0 + settime flags: 01 + it_value: (0, 49406829) + it_interval: (1, 0) + +where 'clockid' is the clock type and 'ticks' is the number of the timer expirations +that have occurred [see timerfd_create(2) for details]. 'settime flags' are +flags in octal form been used to setup the timer [see timerfd_settime(2) for +details]. 'it_value' is remaining time until the timer exiration. +'it_interval' is the interval for the timer. Note the timer might be set up +with TIMER_ABSTIME option which will be shown in 'settime flags', but 'it_value' +still exhibits timer's remaining time. + +3.9 /proc//map_files - Information about memory mapped files +--------------------------------------------------------------------- +This directory contains symbolic links which represent memory mapped files +the process is maintaining. Example output:: + + | lr-------- 1 root root 64 Jan 27 11:24 333c600000-333c620000 -> /usr/lib64/ld-2.18.so + | lr-------- 1 root root 64 Jan 27 11:24 333c81f000-333c820000 -> /usr/lib64/ld-2.18.so + | lr-------- 1 root root 64 Jan 27 11:24 333c820000-333c821000 -> /usr/lib64/ld-2.18.so + | ... + | lr-------- 1 root root 64 Jan 27 11:24 35d0421000-35d0422000 -> /usr/lib64/libselinux.so.1 + | lr-------- 1 root root 64 Jan 27 11:24 400000-41a000 -> /usr/bin/ls + +The name of a link represents the virtual memory bounds of a mapping, i.e. +vm_area_struct::vm_start-vm_area_struct::vm_end. + +The main purpose of the map_files is to retrieve a set of memory mapped +files in a fast way instead of parsing /proc//maps or +/proc//smaps, both of which contain many more records. At the same +time one can open(2) mappings from the listings of two processes and +comparing their inode numbers to figure out which anonymous memory areas +are actually shared. + +3.10 /proc//timerslack_ns - Task timerslack value +--------------------------------------------------------- +This file provides the value of the task's timerslack value in nanoseconds. +This value specifies a amount of time that normal timers may be deferred +in order to coalesce timers and avoid unnecessary wakeups. + +This allows a task's interactivity vs power consumption trade off to be +adjusted. + +Writing 0 to the file will set the tasks timerslack to the default value. + +Valid values are from 0 - ULLONG_MAX + +An application setting the value must have PTRACE_MODE_ATTACH_FSCREDS level +permissions on the task specified to change its timerslack_ns value. + +3.11 /proc//patch_state - Livepatch patch operation state +----------------------------------------------------------------- +When CONFIG_LIVEPATCH is enabled, this file displays the value of the +patch state for the task. + +A value of '-1' indicates that no patch is in transition. + +A value of '0' indicates that a patch is in transition and the task is +unpatched. If the patch is being enabled, then the task hasn't been +patched yet. If the patch is being disabled, then the task has already +been unpatched. + +A value of '1' indicates that a patch is in transition and the task is +patched. If the patch is being enabled, then the task has already been +patched. If the patch is being disabled, then the task hasn't been +unpatched yet. + +3.12 /proc//arch_status - task architecture specific status +------------------------------------------------------------------- +When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the +architecture specific status of the task. + +Example +~~~~~~~ + +:: + + $ cat /proc/6753/arch_status + AVX512_elapsed_ms: 8 + +Description +~~~~~~~~~~~ + +x86 specific entries: +~~~~~~~~~~~~~~~~~~~~~ + +AVX512_elapsed_ms: +^^^^^^^^^^^^^^^^^^ + + If AVX512 is supported on the machine, this entry shows the milliseconds + elapsed since the last time AVX512 usage was recorded. The recording + happens on a best effort basis when a task is scheduled out. This means + that the value depends on two factors: + + 1) The time which the task spent on the CPU without being scheduled + out. With CPU isolation and a single runnable task this can take + several seconds. + + 2) The time since the task was scheduled out last. Depending on the + reason for being scheduled out (time slice exhausted, syscall ...) + this can be arbitrary long time. + + As a consequence the value cannot be considered precise and authoritative + information. The application which uses this information has to be aware + of the overall scenario on the system in order to determine whether a + task is a real AVX512 user or not. Precise information can be obtained + with performance counters. + + A special value of '-1' indicates that no AVX512 usage was recorded, thus + the task is unlikely an AVX512 user, but depends on the workload and the + scheduling scenario, it also could be a false negative mentioned above. + +Configuring procfs +------------------ + +4.1 Mount options +--------------------- + +The following mount options are supported: + + ========= ======================================================== + hidepid= Set /proc// access mode. + gid= Set the group authorized to learn processes information. + ========= ======================================================== + +hidepid=0 means classic mode - everybody may access all /proc// directories +(default). + +hidepid=1 means users may not access any /proc// directories but their +own. Sensitive files like cmdline, sched*, status are now protected against +other users. This makes it impossible to learn whether any user runs +specific program (given the program doesn't reveal itself by its behaviour). +As an additional bonus, as /proc//cmdline is unaccessible for other users, +poorly written programs passing sensitive information via program arguments are +now protected against local eavesdroppers. + +hidepid=2 means hidepid=1 plus all /proc// will be fully invisible to other +users. It doesn't mean that it hides a fact whether a process with a specific +pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"), +but it hides process' uid and gid, which may be learned by stat()'ing +/proc// otherwise. It greatly complicates an intruder's task of gathering +information about running processes, whether some daemon runs with elevated +privileges, whether other user runs some sensitive program, whether other users +run any program at all, etc. + +gid= defines a group authorized to learn processes information otherwise +prohibited by hidepid=. If you use some daemon like identd which needs to learn +information about processes information, just add identd to this group. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt deleted file mode 100644 index 99ca040e3f90..000000000000 --- a/Documentation/filesystems/proc.txt +++ /dev/null @@ -1,2047 +0,0 @@ ------------------------------------------------------------------------------- - T H E /proc F I L E S Y S T E M ------------------------------------------------------------------------------- -/proc/sys Terrehon Bowden October 7 1999 - Bodo Bauer - -2.4.x update Jorge Nerin November 14 2000 -move /proc/sys Shen Feng April 1 2009 ------------------------------------------------------------------------------- -Version 1.3 Kernel version 2.2.12 - Kernel version 2.4.0-test11-pre4 ------------------------------------------------------------------------------- -fixes/update part 1.1 Stefani Seibold June 9 2009 - -Table of Contents ------------------ - - 0 Preface - 0.1 Introduction/Credits - 0.2 Legal Stuff - - 1 Collecting System Information - 1.1 Process-Specific Subdirectories - 1.2 Kernel data - 1.3 IDE devices in /proc/ide - 1.4 Networking info in /proc/net - 1.5 SCSI info - 1.6 Parallel port info in /proc/parport - 1.7 TTY info in /proc/tty - 1.8 Miscellaneous kernel statistics in /proc/stat - 1.9 Ext4 file system parameters - - 2 Modifying System Parameters - - 3 Per-Process Parameters - 3.1 /proc//oom_adj & /proc//oom_score_adj - Adjust the oom-killer - score - 3.2 /proc//oom_score - Display current oom-killer score - 3.3 /proc//io - Display the IO accounting fields - 3.4 /proc//coredump_filter - Core dump filtering settings - 3.5 /proc//mountinfo - Information about mounts - 3.6 /proc//comm & /proc//task//comm - 3.7 /proc//task//children - Information about task children - 3.8 /proc//fdinfo/ - Information about opened file - 3.9 /proc//map_files - Information about memory mapped files - 3.10 /proc//timerslack_ns - Task timerslack value - 3.11 /proc//patch_state - Livepatch patch operation state - 3.12 /proc//arch_status - Task architecture specific information - - 4 Configuring procfs - 4.1 Mount options - ------------------------------------------------------------------------------- -Preface ------------------------------------------------------------------------------- - -0.1 Introduction/Credits ------------------------- - -This documentation is part of a soon (or so we hope) to be released book on -the SuSE Linux distribution. As there is no complete documentation for the -/proc file system and we've used many freely available sources to write these -chapters, it seems only fair to give the work back to the Linux community. -This work is based on the 2.2.* kernel version and the upcoming 2.4.*. I'm -afraid it's still far from complete, but we hope it will be useful. As far as -we know, it is the first 'all-in-one' document about the /proc file system. It -is focused on the Intel x86 hardware, so if you are looking for PPC, ARM, -SPARC, AXP, etc., features, you probably won't find what you are looking for. -It also only covers IPv4 networking, not IPv6 nor other protocols - sorry. But -additions and patches are welcome and will be added to this document if you -mail them to Bodo. - -We'd like to thank Alan Cox, Rik van Riel, and Alexey Kuznetsov and a lot of -other people for help compiling this documentation. We'd also like to extend a -special thank you to Andi Kleen for documentation, which we relied on heavily -to create this document, as well as the additional information he provided. -Thanks to everybody else who contributed source or docs to the Linux kernel -and helped create a great piece of software... :) - -If you have any comments, corrections or additions, please don't hesitate to -contact Bodo Bauer at bb@ricochet.net. We'll be happy to add them to this -document. - -The latest version of this document is available online at -http://tldp.org/LDP/Linux-Filesystem-Hierarchy/html/proc.html - -If the above direction does not works for you, you could try the kernel -mailing list at linux-kernel@vger.kernel.org and/or try to reach me at -comandante@zaralinux.com. - -0.2 Legal Stuff ---------------- - -We don't guarantee the correctness of this document, and if you come to us -complaining about how you screwed up your system because of incorrect -documentation, we won't feel responsible... - ------------------------------------------------------------------------------- -CHAPTER 1: COLLECTING SYSTEM INFORMATION ------------------------------------------------------------------------------- - ------------------------------------------------------------------------------- -In This Chapter ------------------------------------------------------------------------------- -* Investigating the properties of the pseudo file system /proc and its - ability to provide information on the running Linux system -* Examining /proc's structure -* Uncovering various information about the kernel and the processes running - on the system ------------------------------------------------------------------------------- - - -The proc file system acts as an interface to internal data structures in the -kernel. It can be used to obtain information about the system and to change -certain kernel parameters at runtime (sysctl). - -First, we'll take a look at the read-only parts of /proc. In Chapter 2, we -show you how you can use /proc/sys to change settings. - -1.1 Process-Specific Subdirectories ------------------------------------ - -The directory /proc contains (among other things) one subdirectory for each -process running on the system, which is named after the process ID (PID). - -The link self points to the process reading the file system. Each process -subdirectory has the entries listed in Table 1-1. - -Note that an open a file descriptor to /proc/ or to any of its -contained files or subdirectories does not prevent being reused -for some other process in the event that exits. Operations on -open /proc/ file descriptors corresponding to dead processes -never act on any new process that the kernel may, through chance, have -also assigned the process ID . Instead, operations on these FDs -usually fail with ESRCH. - -Table 1-1: Process specific entries in /proc -.............................................................................. - File Content - clear_refs Clears page referenced bits shown in smaps output - cmdline Command line arguments - cpu Current and last cpu in which it was executed (2.4)(smp) - cwd Link to the current working directory - environ Values of environment variables - exe Link to the executable of this process - fd Directory, which contains all file descriptors - maps Memory maps to executables and library files (2.4) - mem Memory held by this process - root Link to the root directory of this process - stat Process status - statm Process memory status information - status Process status in human readable form - wchan Present with CONFIG_KALLSYMS=y: it shows the kernel function - symbol the task is blocked in - or "0" if not blocked. - pagemap Page table - stack Report full stack trace, enable via CONFIG_STACKTRACE - smaps An extension based on maps, showing the memory consumption of - each mapping and flags associated with it - smaps_rollup Accumulated smaps stats for all mappings of the process. This - can be derived from smaps, but is faster and more convenient - numa_maps An extension based on maps, showing the memory locality and - binding policy as well as mem usage (in pages) of each mapping. -.............................................................................. - -For example, to get the status information of a process, all you have to do is -read the file /proc/PID/status: - - >cat /proc/self/status - Name: cat - State: R (running) - Tgid: 5452 - Pid: 5452 - PPid: 743 - TracerPid: 0 (2.4) - Uid: 501 501 501 501 - Gid: 100 100 100 100 - FDSize: 256 - Groups: 100 14 16 - VmPeak: 5004 kB - VmSize: 5004 kB - VmLck: 0 kB - VmHWM: 476 kB - VmRSS: 476 kB - RssAnon: 352 kB - RssFile: 120 kB - RssShmem: 4 kB - VmData: 156 kB - VmStk: 88 kB - VmExe: 68 kB - VmLib: 1412 kB - VmPTE: 20 kb - VmSwap: 0 kB - HugetlbPages: 0 kB - CoreDumping: 0 - THP_enabled: 1 - Threads: 1 - SigQ: 0/28578 - SigPnd: 0000000000000000 - ShdPnd: 0000000000000000 - SigBlk: 0000000000000000 - SigIgn: 0000000000000000 - SigCgt: 0000000000000000 - CapInh: 00000000fffffeff - CapPrm: 0000000000000000 - CapEff: 0000000000000000 - CapBnd: ffffffffffffffff - CapAmb: 0000000000000000 - NoNewPrivs: 0 - Seccomp: 0 - Speculation_Store_Bypass: thread vulnerable - voluntary_ctxt_switches: 0 - nonvoluntary_ctxt_switches: 1 - -This shows you nearly the same information you would get if you viewed it with -the ps command. In fact, ps uses the proc file system to obtain its -information. But you get a more detailed view of the process by reading the -file /proc/PID/status. It fields are described in table 1-2. - -The statm file contains more detailed information about the process -memory usage. Its seven fields are explained in Table 1-3. The stat file -contains details information about the process itself. Its fields are -explained in Table 1-4. - -(for SMP CONFIG users) -For making accounting scalable, RSS related information are handled in an -asynchronous manner and the value may not be very precise. To see a precise -snapshot of a moment, you can see /proc//smaps file and scan page table. -It's slow but very precise. - -Table 1-2: Contents of the status files (as of 4.19) -.............................................................................. - Field Content - Name filename of the executable - Umask file mode creation mask - State state (R is running, S is sleeping, D is sleeping - in an uninterruptible wait, Z is zombie, - T is traced or stopped) - Tgid thread group ID - Ngid NUMA group ID (0 if none) - Pid process id - PPid process id of the parent process - TracerPid PID of process tracing this process (0 if not) - Uid Real, effective, saved set, and file system UIDs - Gid Real, effective, saved set, and file system GIDs - FDSize number of file descriptor slots currently allocated - Groups supplementary group list - NStgid descendant namespace thread group ID hierarchy - NSpid descendant namespace process ID hierarchy - NSpgid descendant namespace process group ID hierarchy - NSsid descendant namespace session ID hierarchy - VmPeak peak virtual memory size - VmSize total program size - VmLck locked memory size - VmPin pinned memory size - VmHWM peak resident set size ("high water mark") - VmRSS size of memory portions. It contains the three - following parts (VmRSS = RssAnon + RssFile + RssShmem) - RssAnon size of resident anonymous memory - RssFile size of resident file mappings - RssShmem size of resident shmem memory (includes SysV shm, - mapping of tmpfs and shared anonymous mappings) - VmData size of private data segments - VmStk size of stack segments - VmExe size of text segment - VmLib size of shared library code - VmPTE size of page table entries - VmSwap amount of swap used by anonymous private data - (shmem swap usage is not included) - HugetlbPages size of hugetlb memory portions - CoreDumping process's memory is currently being dumped - (killing the process may lead to a corrupted core) - THP_enabled process is allowed to use THP (returns 0 when - PR_SET_THP_DISABLE is set on the process - Threads number of threads - SigQ number of signals queued/max. number for queue - SigPnd bitmap of pending signals for the thread - ShdPnd bitmap of shared pending signals for the process - SigBlk bitmap of blocked signals - SigIgn bitmap of ignored signals - SigCgt bitmap of caught signals - CapInh bitmap of inheritable capabilities - CapPrm bitmap of permitted capabilities - CapEff bitmap of effective capabilities - CapBnd bitmap of capabilities bounding set - CapAmb bitmap of ambient capabilities - NoNewPrivs no_new_privs, like prctl(PR_GET_NO_NEW_PRIV, ...) - Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...) - Speculation_Store_Bypass speculative store bypass mitigation status - Cpus_allowed mask of CPUs on which this process may run - Cpus_allowed_list Same as previous, but in "list format" - Mems_allowed mask of memory nodes allowed to this process - Mems_allowed_list Same as previous, but in "list format" - voluntary_ctxt_switches number of voluntary context switches - nonvoluntary_ctxt_switches number of non voluntary context switches -.............................................................................. - -Table 1-3: Contents of the statm files (as of 2.6.8-rc3) -.............................................................................. - Field Content - size total program size (pages) (same as VmSize in status) - resident size of memory portions (pages) (same as VmRSS in status) - shared number of pages that are shared (i.e. backed by a file, same - as RssFile+RssShmem in status) - trs number of pages that are 'code' (not including libs; broken, - includes data segment) - lrs number of pages of library (always 0 on 2.6) - drs number of pages of data/stack (including libs; broken, - includes library text) - dt number of dirty pages (always 0 on 2.6) -.............................................................................. - - -Table 1-4: Contents of the stat files (as of 2.6.30-rc7) -.............................................................................. - Field Content - pid process id - tcomm filename of the executable - state state (R is running, S is sleeping, D is sleeping in an - uninterruptible wait, Z is zombie, T is traced or stopped) - ppid process id of the parent process - pgrp pgrp of the process - sid session id - tty_nr tty the process uses - tty_pgrp pgrp of the tty - flags task flags - min_flt number of minor faults - cmin_flt number of minor faults with child's - maj_flt number of major faults - cmaj_flt number of major faults with child's - utime user mode jiffies - stime kernel mode jiffies - cutime user mode jiffies with child's - cstime kernel mode jiffies with child's - priority priority level - nice nice level - num_threads number of threads - it_real_value (obsolete, always 0) - start_time time the process started after system boot - vsize virtual memory size - rss resident set memory size - rsslim current limit in bytes on the rss - start_code address above which program text can run - end_code address below which program text can run - start_stack address of the start of the main process stack - esp current value of ESP - eip current value of EIP - pending bitmap of pending signals - blocked bitmap of blocked signals - sigign bitmap of ignored signals - sigcatch bitmap of caught signals - 0 (place holder, used to be the wchan address, use /proc/PID/wchan instead) - 0 (place holder) - 0 (place holder) - exit_signal signal to send to parent thread on exit - task_cpu which CPU the task is scheduled on - rt_priority realtime priority - policy scheduling policy (man sched_setscheduler) - blkio_ticks time spent waiting for block IO - gtime guest time of the task in jiffies - cgtime guest time of the task children in jiffies - start_data address above which program data+bss is placed - end_data address below which program data+bss is placed - start_brk address above which program heap can be expanded with brk() - arg_start address above which program command line is placed - arg_end address below which program command line is placed - env_start address above which program environment is placed - env_end address below which program environment is placed - exit_code the thread's exit_code in the form reported by the waitpid system call -.............................................................................. - -The /proc/PID/maps file contains the currently mapped memory regions and -their access permissions. - -The format is: - -address perms offset dev inode pathname - -08048000-08049000 r-xp 00000000 03:00 8312 /opt/test -08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test -0804a000-0806b000 rw-p 00000000 00:00 0 [heap] -a7cb1000-a7cb2000 ---p 00000000 00:00 0 -a7cb2000-a7eb2000 rw-p 00000000 00:00 0 -a7eb2000-a7eb3000 ---p 00000000 00:00 0 -a7eb3000-a7ed5000 rw-p 00000000 00:00 0 -a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 -a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 -a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 -a800b000-a800e000 rw-p 00000000 00:00 0 -a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0 -a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0 -a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0 -a8024000-a8027000 rw-p 00000000 00:00 0 -a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2 -a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2 -a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2 -aff35000-aff4a000 rw-p 00000000 00:00 0 [stack] -ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso] - -where "address" is the address space in the process that it occupies, "perms" -is a set of permissions: - - r = read - w = write - x = execute - s = shared - p = private (copy on write) - -"offset" is the offset into the mapping, "dev" is the device (major:minor), and -"inode" is the inode on that device. 0 indicates that no inode is associated -with the memory region, as the case would be with BSS (uninitialized data). -The "pathname" shows the name associated file for this mapping. If the mapping -is not associated with a file: - - [heap] = the heap of the program - [stack] = the stack of the main process - [vdso] = the "virtual dynamic shared object", - the kernel system call handler - - or if empty, the mapping is anonymous. - -The /proc/PID/smaps is an extension based on maps, showing the memory -consumption for each of the process's mappings. For each mapping (aka Virtual -Memory Area, or VMA) there is a series of lines such as the following: - -08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash - -Size: 1084 kB -KernelPageSize: 4 kB -MMUPageSize: 4 kB -Rss: 892 kB -Pss: 374 kB -Shared_Clean: 892 kB -Shared_Dirty: 0 kB -Private_Clean: 0 kB -Private_Dirty: 0 kB -Referenced: 892 kB -Anonymous: 0 kB -LazyFree: 0 kB -AnonHugePages: 0 kB -ShmemPmdMapped: 0 kB -Shared_Hugetlb: 0 kB -Private_Hugetlb: 0 kB -Swap: 0 kB -SwapPss: 0 kB -KernelPageSize: 4 kB -MMUPageSize: 4 kB -Locked: 0 kB -THPeligible: 0 -VmFlags: rd ex mr mw me dw - -The first of these lines shows the same information as is displayed for the -mapping in /proc/PID/maps. Following lines show the size of the mapping -(size); the size of each page allocated when backing a VMA (KernelPageSize), -which is usually the same as the size in the page table entries; the page size -used by the MMU when backing a VMA (in most cases, the same as KernelPageSize); -the amount of the mapping that is currently resident in RAM (RSS); the -process' proportional share of this mapping (PSS); and the number of clean and -dirty shared and private pages in the mapping. - -The "proportional set size" (PSS) of a process is the count of pages it has -in memory, where each page is divided by the number of processes sharing it. -So if a process has 1000 pages all to itself, and 1000 shared with one other -process, its PSS will be 1500. -Note that even a page which is part of a MAP_SHARED mapping, but has only -a single pte mapped, i.e. is currently used by only one process, is accounted -as private and not as shared. -"Referenced" indicates the amount of memory currently marked as referenced or -accessed. -"Anonymous" shows the amount of memory that does not belong to any file. Even -a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE -and a page is modified, the file page is replaced by a private anonymous copy. -"LazyFree" shows the amount of memory which is marked by madvise(MADV_FREE). -The memory isn't freed immediately with madvise(). It's freed in memory -pressure if the memory is clean. Please note that the printed value might -be lower than the real value due to optimizations used in the current -implementation. If this is not desirable please file a bug report. -"AnonHugePages" shows the ammount of memory backed by transparent hugepage. -"ShmemPmdMapped" shows the ammount of shared (shmem/tmpfs) memory backed by -huge pages. -"Shared_Hugetlb" and "Private_Hugetlb" show the ammounts of memory backed by -hugetlbfs page which is *not* counted in "RSS" or "PSS" field for historical -reasons. And these are not included in {Shared,Private}_{Clean,Dirty} field. -"Swap" shows how much would-be-anonymous memory is also used, but out on swap. -For shmem mappings, "Swap" includes also the size of the mapped (and not -replaced by copy-on-write) part of the underlying shmem object out on swap. -"SwapPss" shows proportional swap share of this mapping. Unlike "Swap", this -does not take into account swapped out page of underlying shmem objects. -"Locked" indicates whether the mapping is locked in memory or not. -"THPeligible" indicates whether the mapping is eligible for allocating THP -pages - 1 if true, 0 otherwise. It just shows the current status. - -"VmFlags" field deserves a separate description. This member represents the kernel -flags associated with the particular virtual memory area in two letter encoded -manner. The codes are the following: - rd - readable - wr - writeable - ex - executable - sh - shared - mr - may read - mw - may write - me - may execute - ms - may share - gd - stack segment growns down - pf - pure PFN range - dw - disabled write to the mapped file - lo - pages are locked in memory - io - memory mapped I/O area - sr - sequential read advise provided - rr - random read advise provided - dc - do not copy area on fork - de - do not expand area on remapping - ac - area is accountable - nr - swap space is not reserved for the area - ht - area uses huge tlb pages - ar - architecture specific flag - dd - do not include area into core dump - sd - soft-dirty flag - mm - mixed map area - hg - huge page advise flag - nh - no-huge page advise flag - mg - mergable advise flag - -Note that there is no guarantee that every flag and associated mnemonic will -be present in all further kernel releases. Things get changed, the flags may -be vanished or the reverse -- new added. Interpretation of their meaning -might change in future as well. So each consumer of these flags has to -follow each specific kernel version for the exact semantic. - -This file is only present if the CONFIG_MMU kernel configuration option is -enabled. - -Note: reading /proc/PID/maps or /proc/PID/smaps is inherently racy (consistent -output can be achieved only in the single read call). -This typically manifests when doing partial reads of these files while the -memory map is being modified. Despite the races, we do provide the following -guarantees: - -1) The mapped addresses never go backwards, which implies no two - regions will ever overlap. -2) If there is something at a given vaddr during the entirety of the - life of the smaps/maps walk, there will be some output for it. - -The /proc/PID/smaps_rollup file includes the same fields as /proc/PID/smaps, -but their values are the sums of the corresponding values for all mappings of -the process. Additionally, it contains these fields: - -Pss_Anon -Pss_File -Pss_Shmem - -They represent the proportional shares of anonymous, file, and shmem pages, as -described for smaps above. These fields are omitted in smaps since each -mapping identifies the type (anon, file, or shmem) of all pages it contains. -Thus all information in smaps_rollup can be derived from smaps, but at a -significantly higher cost. - -The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG -bits on both physical and virtual pages associated with a process, and the -soft-dirty bit on pte (see Documentation/admin-guide/mm/soft-dirty.rst -for details). -To clear the bits for all the pages associated with the process - > echo 1 > /proc/PID/clear_refs - -To clear the bits for the anonymous pages associated with the process - > echo 2 > /proc/PID/clear_refs - -To clear the bits for the file mapped pages associated with the process - > echo 3 > /proc/PID/clear_refs - -To clear the soft-dirty bit - > echo 4 > /proc/PID/clear_refs - -To reset the peak resident set size ("high water mark") to the process's -current value: - > echo 5 > /proc/PID/clear_refs - -Any other value written to /proc/PID/clear_refs will have no effect. - -The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags -using /proc/kpageflags and number of times a page is mapped using -/proc/kpagecount. For detailed explanation, see -Documentation/admin-guide/mm/pagemap.rst. - -The /proc/pid/numa_maps is an extension based on maps, showing the memory -locality and binding policy, as well as the memory usage (in pages) of -each mapping. The output follows a general format where mapping details get -summarized separated by blank spaces, one mapping per each file line: - -address policy mapping details - -00400000 default file=/usr/local/bin/app mapped=1 active=0 N3=1 kernelpagesize_kB=4 -00600000 default file=/usr/local/bin/app anon=1 dirty=1 N3=1 kernelpagesize_kB=4 -3206000000 default file=/lib64/ld-2.12.so mapped=26 mapmax=6 N0=24 N3=2 kernelpagesize_kB=4 -320621f000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4 -3206220000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4 -3206221000 default anon=1 dirty=1 N3=1 kernelpagesize_kB=4 -3206800000 default file=/lib64/libc-2.12.so mapped=59 mapmax=21 active=55 N0=41 N3=18 kernelpagesize_kB=4 -320698b000 default file=/lib64/libc-2.12.so -3206b8a000 default file=/lib64/libc-2.12.so anon=2 dirty=2 N3=2 kernelpagesize_kB=4 -3206b8e000 default file=/lib64/libc-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4 -3206b8f000 default anon=3 dirty=3 active=1 N3=3 kernelpagesize_kB=4 -7f4dc10a2000 default anon=3 dirty=3 N3=3 kernelpagesize_kB=4 -7f4dc10b4000 default anon=2 dirty=2 active=1 N3=2 kernelpagesize_kB=4 -7f4dc1200000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N3=1 kernelpagesize_kB=2048 -7fff335f0000 default stack anon=3 dirty=3 N3=3 kernelpagesize_kB=4 -7fff3369d000 default mapped=1 mapmax=35 active=0 N3=1 kernelpagesize_kB=4 - -Where: -"address" is the starting address for the mapping; -"policy" reports the NUMA memory policy set for the mapping (see Documentation/admin-guide/mm/numa_memory_policy.rst); -"mapping details" summarizes mapping data such as mapping type, page usage counters, -node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page -size, in KB, that is backing the mapping up. - -1.2 Kernel data ---------------- - -Similar to the process entries, the kernel data files give information about -the running kernel. The files used to obtain this information are contained in -/proc and are listed in Table 1-5. Not all of these will be present in your -system. It depends on the kernel configuration and the loaded modules, which -files are there, and which are missing. - -Table 1-5: Kernel info in /proc -.............................................................................. - File Content - apm Advanced power management info - buddyinfo Kernel memory allocator information (see text) (2.5) - bus Directory containing bus specific information - cmdline Kernel command line - cpuinfo Info about the CPU - devices Available devices (block and character) - dma Used DMS channels - filesystems Supported filesystems - driver Various drivers grouped here, currently rtc (2.4) - execdomains Execdomains, related to security (2.4) - fb Frame Buffer devices (2.4) - fs File system parameters, currently nfs/exports (2.4) - ide Directory containing info about the IDE subsystem - interrupts Interrupt usage - iomem Memory map (2.4) - ioports I/O port usage - irq Masks for irq to cpu affinity (2.4)(smp?) - isapnp ISA PnP (Plug&Play) Info (2.4) - kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4)) - kmsg Kernel messages - ksyms Kernel symbol table - loadavg Load average of last 1, 5 & 15 minutes - locks Kernel locks - meminfo Memory info - misc Miscellaneous - modules List of loaded modules - mounts Mounted filesystems - net Networking info (see text) - pagetypeinfo Additional page allocator information (see text) (2.5) - partitions Table of partitions known to the system - pci Deprecated info of PCI bus (new way -> /proc/bus/pci/, - decoupled by lspci (2.4) - rtc Real time clock - scsi SCSI info (see text) - slabinfo Slab pool info - softirqs softirq usage - stat Overall statistics - swaps Swap space utilization - sys See chapter 2 - sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4) - tty Info of tty drivers - uptime Wall clock since boot, combined idle time of all cpus - version Kernel version - video bttv info of video resources (2.4) - vmallocinfo Show vmalloced areas -.............................................................................. - -You can, for example, check which interrupts are currently in use and what -they are used for by looking in the file /proc/interrupts: - - > cat /proc/interrupts - CPU0 - 0: 8728810 XT-PIC timer - 1: 895 XT-PIC keyboard - 2: 0 XT-PIC cascade - 3: 531695 XT-PIC aha152x - 4: 2014133 XT-PIC serial - 5: 44401 XT-PIC pcnet_cs - 8: 2 XT-PIC rtc - 11: 8 XT-PIC i82365 - 12: 182918 XT-PIC PS/2 Mouse - 13: 1 XT-PIC fpu - 14: 1232265 XT-PIC ide0 - 15: 7 XT-PIC ide1 - NMI: 0 - -In 2.4.* a couple of lines where added to this file LOC & ERR (this time is the -output of a SMP machine): - - > cat /proc/interrupts - - CPU0 CPU1 - 0: 1243498 1214548 IO-APIC-edge timer - 1: 8949 8958 IO-APIC-edge keyboard - 2: 0 0 XT-PIC cascade - 5: 11286 10161 IO-APIC-edge soundblaster - 8: 1 0 IO-APIC-edge rtc - 9: 27422 27407 IO-APIC-edge 3c503 - 12: 113645 113873 IO-APIC-edge PS/2 Mouse - 13: 0 0 XT-PIC fpu - 14: 22491 24012 IO-APIC-edge ide0 - 15: 2183 2415 IO-APIC-edge ide1 - 17: 30564 30414 IO-APIC-level eth0 - 18: 177 164 IO-APIC-level bttv - NMI: 2457961 2457959 - LOC: 2457882 2457881 - ERR: 2155 - -NMI is incremented in this case because every timer interrupt generates a NMI -(Non Maskable Interrupt) which is used by the NMI Watchdog to detect lockups. - -LOC is the local interrupt counter of the internal APIC of every CPU. - -ERR is incremented in the case of errors in the IO-APIC bus (the bus that -connects the CPUs in a SMP system. This means that an error has been detected, -the IO-APIC automatically retry the transmission, so it should not be a big -problem, but you should read the SMP-FAQ. - -In 2.6.2* /proc/interrupts was expanded again. This time the goal was for -/proc/interrupts to display every IRQ vector in use by the system, not -just those considered 'most important'. The new vectors are: - - THR -- interrupt raised when a machine check threshold counter - (typically counting ECC corrected errors of memory or cache) exceeds - a configurable threshold. Only available on some systems. - - TRM -- a thermal event interrupt occurs when a temperature threshold - has been exceeded for the CPU. This interrupt may also be generated - when the temperature drops back to normal. - - SPU -- a spurious interrupt is some interrupt that was raised then lowered - by some IO device before it could be fully processed by the APIC. Hence - the APIC sees the interrupt but does not know what device it came from. - For this case the APIC will generate the interrupt with a IRQ vector - of 0xff. This might also be generated by chipset bugs. - - RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are - sent from one CPU to another per the needs of the OS. Typically, - their statistics are used by kernel developers and interested users to - determine the occurrence of interrupts of the given type. - -The above IRQ vectors are displayed only when relevant. For example, -the threshold vector does not exist on x86_64 platforms. Others are -suppressed when the system is a uniprocessor. As of this writing, only -i386 and x86_64 platforms support the new IRQ vector displays. - -Of some interest is the introduction of the /proc/irq directory to 2.4. -It could be used to set IRQ to CPU affinity, this means that you can "hook" an -IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the -irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and -prof_cpu_mask. - -For example - > ls /proc/irq/ - 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask - 1 11 13 15 17 19 3 5 7 9 default_smp_affinity - > ls /proc/irq/0/ - smp_affinity - -smp_affinity is a bitmask, in which you can specify which CPUs can handle the -IRQ, you can set it by doing: - - > echo 1 > /proc/irq/10/smp_affinity - -This means that only the first CPU will handle the IRQ, but you can also echo -5 which means that only the first and third CPU can handle the IRQ. - -The contents of each smp_affinity file is the same by default: - - > cat /proc/irq/0/smp_affinity - ffffffff - -There is an alternate interface, smp_affinity_list which allows specifying -a cpu range instead of a bitmask: - - > cat /proc/irq/0/smp_affinity_list - 1024-1031 - -The default_smp_affinity mask applies to all non-active IRQs, which are the -IRQs which have not yet been allocated/activated, and hence which lack a -/proc/irq/[0-9]* directory. - -The node file on an SMP system shows the node to which the device using the IRQ -reports itself as being attached. This hardware locality information does not -include information about any possible driver locality preference. - -prof_cpu_mask specifies which CPUs are to be profiled by the system wide -profiler. Default value is ffffffff (all cpus if there are only 32 of them). - -The way IRQs are routed is handled by the IO-APIC, and it's Round Robin -between all the CPUs which are allowed to handle it. As usual the kernel has -more info than you and does a better job than you, so the defaults are the -best choice for almost everyone. [Note this applies only to those IO-APIC's -that support "Round Robin" interrupt distribution.] - -There are three more important subdirectories in /proc: net, scsi, and sys. -The general rule is that the contents, or even the existence of these -directories, depend on your kernel configuration. If SCSI is not enabled, the -directory scsi may not exist. The same is true with the net, which is there -only when networking support is present in the running kernel. - -The slabinfo file gives information about memory usage at the slab level. -Linux uses slab pools for memory management above page level in version 2.2. -Commonly used objects have their own slab pool (such as network buffers, -directory cache, and so on). - -.............................................................................. - -> cat /proc/buddyinfo - -Node 0, zone DMA 0 4 5 4 4 3 ... -Node 0, zone Normal 1 0 0 1 101 8 ... -Node 0, zone HighMem 2 0 0 1 1 0 ... - -External fragmentation is a problem under some workloads, and buddyinfo is a -useful tool for helping diagnose these problems. Buddyinfo will give you a -clue as to how big an area you can safely allocate, or why a previous -allocation failed. - -Each column represents the number of pages of a certain order which are -available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in -ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE -available in ZONE_NORMAL, etc... - -More information relevant to external fragmentation can be found in -pagetypeinfo. - -> cat /proc/pagetypeinfo -Page block order: 9 -Pages per block: 512 - -Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10 -Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0 -Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0 -Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2 -Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0 -Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0 -Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9 -Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0 -Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452 -Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0 -Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0 - -Number of blocks type Unmovable Reclaimable Movable Reserve Isolate -Node 0, zone DMA 2 0 5 1 0 -Node 0, zone DMA32 41 6 967 2 0 - -Fragmentation avoidance in the kernel works by grouping pages of different -migrate types into the same contiguous regions of memory called page blocks. -A page block is typically the size of the default hugepage size e.g. 2MB on -X86-64. By keeping pages grouped based on their ability to move, the kernel -can reclaim pages within a page block to satisfy a high-order allocation. - -The pagetypinfo begins with information on the size of a page block. It -then gives the same type of information as buddyinfo except broken down -by migrate-type and finishes with details on how many page blocks of each -type exist. - -If min_free_kbytes has been tuned correctly (recommendations made by hugeadm -from libhugetlbfs https://github.com/libhugetlbfs/libhugetlbfs/), one can -make an estimate of the likely number of huge pages that can be allocated -at a given point in time. All the "Movable" blocks should be allocatable -unless memory has been mlock()'d. Some of the Reclaimable blocks should -also be allocatable although a lot of filesystem metadata may have to be -reclaimed to achieve this. - -.............................................................................. - -meminfo: - -Provides information about distribution and utilization of memory. This -varies by architecture and compile options. The following is from a -16GB PIII, which has highmem enabled. You may not have all of these fields. - -> cat /proc/meminfo - -MemTotal: 16344972 kB -MemFree: 13634064 kB -MemAvailable: 14836172 kB -Buffers: 3656 kB -Cached: 1195708 kB -SwapCached: 0 kB -Active: 891636 kB -Inactive: 1077224 kB -HighTotal: 15597528 kB -HighFree: 13629632 kB -LowTotal: 747444 kB -LowFree: 4432 kB -SwapTotal: 0 kB -SwapFree: 0 kB -Dirty: 968 kB -Writeback: 0 kB -AnonPages: 861800 kB -Mapped: 280372 kB -Shmem: 644 kB -KReclaimable: 168048 kB -Slab: 284364 kB -SReclaimable: 159856 kB -SUnreclaim: 124508 kB -PageTables: 24448 kB -NFS_Unstable: 0 kB -Bounce: 0 kB -WritebackTmp: 0 kB -CommitLimit: 7669796 kB -Committed_AS: 100056 kB -VmallocTotal: 112216 kB -VmallocUsed: 428 kB -VmallocChunk: 111088 kB -Percpu: 62080 kB -HardwareCorrupted: 0 kB -AnonHugePages: 49152 kB -ShmemHugePages: 0 kB -ShmemPmdMapped: 0 kB - - - MemTotal: Total usable ram (i.e. physical ram minus a few reserved - bits and the kernel binary code) - MemFree: The sum of LowFree+HighFree -MemAvailable: An estimate of how much memory is available for starting new - applications, without swapping. Calculated from MemFree, - SReclaimable, the size of the file LRU lists, and the low - watermarks in each zone. - The estimate takes into account that the system needs some - page cache to function well, and that not all reclaimable - slab will be reclaimable, due to items being in use. The - impact of those factors will vary from system to system. - Buffers: Relatively temporary storage for raw disk blocks - shouldn't get tremendously large (20MB or so) - Cached: in-memory cache for files read from the disk (the - pagecache). Doesn't include SwapCached - SwapCached: Memory that once was swapped out, is swapped back in but - still also is in the swapfile (if memory is needed it - doesn't need to be swapped out AGAIN because it is already - in the swapfile. This saves I/O) - Active: Memory that has been used more recently and usually not - reclaimed unless absolutely necessary. - Inactive: Memory which has been less recently used. It is more - eligible to be reclaimed for other purposes - HighTotal: - HighFree: Highmem is all memory above ~860MB of physical memory - Highmem areas are for use by userspace programs, or - for the pagecache. The kernel must use tricks to access - this memory, making it slower to access than lowmem. - LowTotal: - LowFree: Lowmem is memory which can be used for everything that - highmem can be used for, but it is also available for the - kernel's use for its own data structures. Among many - other things, it is where everything from the Slab is - allocated. Bad things happen when you're out of lowmem. - SwapTotal: total amount of swap space available - SwapFree: Memory which has been evicted from RAM, and is temporarily - on the disk - Dirty: Memory which is waiting to get written back to the disk - Writeback: Memory which is actively being written back to the disk - AnonPages: Non-file backed pages mapped into userspace page tables -HardwareCorrupted: The amount of RAM/memory in KB, the kernel identifies as - corrupted. -AnonHugePages: Non-file backed huge pages mapped into userspace page tables - Mapped: files which have been mmaped, such as libraries - Shmem: Total memory used by shared memory (shmem) and tmpfs -ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated - with huge pages -ShmemPmdMapped: Shared memory mapped into userspace with huge pages -KReclaimable: Kernel allocations that the kernel will attempt to reclaim - under memory pressure. Includes SReclaimable (below), and other - direct allocations with a shrinker. - Slab: in-kernel data structures cache -SReclaimable: Part of Slab, that might be reclaimed, such as caches - SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure - PageTables: amount of memory dedicated to the lowest level of page - tables. -NFS_Unstable: NFS pages sent to the server, but not yet committed to stable - storage - Bounce: Memory used for block device "bounce buffers" -WritebackTmp: Memory used by FUSE for temporary writeback buffers - CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'), - this is the total amount of memory currently available to - be allocated on the system. This limit is only adhered to - if strict overcommit accounting is enabled (mode 2 in - 'vm.overcommit_memory'). - The CommitLimit is calculated with the following formula: - CommitLimit = ([total RAM pages] - [total huge TLB pages]) * - overcommit_ratio / 100 + [total swap pages] - For example, on a system with 1G of physical RAM and 7G - of swap with a `vm.overcommit_ratio` of 30 it would - yield a CommitLimit of 7.3G. - For more details, see the memory overcommit documentation - in vm/overcommit-accounting. -Committed_AS: The amount of memory presently allocated on the system. - The committed memory is a sum of all of the memory which - has been allocated by processes, even if it has not been - "used" by them as of yet. A process which malloc()'s 1G - of memory, but only touches 300M of it will show up as - using 1G. This 1G is memory which has been "committed" to - by the VM and can be used at any time by the allocating - application. With strict overcommit enabled on the system - (mode 2 in 'vm.overcommit_memory'),allocations which would - exceed the CommitLimit (detailed above) will not be permitted. - This is useful if one needs to guarantee that processes will - not fail due to lack of memory once that memory has been - successfully allocated. -VmallocTotal: total size of vmalloc memory area - VmallocUsed: amount of vmalloc area which is used -VmallocChunk: largest contiguous block of vmalloc area which is free - Percpu: Memory allocated to the percpu allocator used to back percpu - allocations. This stat excludes the cost of metadata. - -.............................................................................. - -vmallocinfo: - -Provides information about vmalloced/vmaped areas. One line per area, -containing the virtual address range of the area, size in bytes, -caller information of the creator, and optional information depending -on the kind of area : - - pages=nr number of pages - phys=addr if a physical address was specified - ioremap I/O mapping (ioremap() and friends) - vmalloc vmalloc() area - vmap vmap()ed pages - user VM_USERMAP area - vpages buffer for pages pointers was vmalloced (huge area) - N=nr (Only on NUMA kernels) - Number of pages allocated on memory node - -> cat /proc/vmallocinfo -0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ... - /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128 -0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ... - /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64 -0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f... - phys=7fee8000 ioremap -0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f... - phys=7fee7000 ioremap -0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210 -0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ... - /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3 -0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ... - pages=2 vmalloc N1=2 -0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ... - /0x130 [x_tables] pages=4 vmalloc N0=4 -0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ... - pages=14 vmalloc N2=14 -0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ... - pages=4 vmalloc N1=4 -0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ... - pages=2 vmalloc N1=2 -0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ... - pages=10 vmalloc N0=10 - -.............................................................................. - -softirqs: - -Provides counts of softirq handlers serviced since boot time, for each cpu. - -> cat /proc/softirqs - CPU0 CPU1 CPU2 CPU3 - HI: 0 0 0 0 - TIMER: 27166 27120 27097 27034 - NET_TX: 0 0 0 17 - NET_RX: 42 0 0 39 - BLOCK: 0 0 107 1121 - TASKLET: 0 0 0 290 - SCHED: 27035 26983 26971 26746 - HRTIMER: 0 0 0 0 - RCU: 1678 1769 2178 2250 - - -1.3 IDE devices in /proc/ide ----------------------------- - -The subdirectory /proc/ide contains information about all IDE devices of which -the kernel is aware. There is one subdirectory for each IDE controller, the -file drivers and a link for each IDE device, pointing to the device directory -in the controller specific subtree. - -The file drivers contains general information about the drivers used for the -IDE devices: - - > cat /proc/ide/drivers - ide-cdrom version 4.53 - ide-disk version 1.08 - -More detailed information can be found in the controller specific -subdirectories. These are named ide0, ide1 and so on. Each of these -directories contains the files shown in table 1-6. - - -Table 1-6: IDE controller info in /proc/ide/ide? -.............................................................................. - File Content - channel IDE channel (0 or 1) - config Configuration (only for PCI/IDE bridge) - mate Mate name - model Type/Chipset of IDE controller -.............................................................................. - -Each device connected to a controller has a separate subdirectory in the -controllers directory. The files listed in table 1-7 are contained in these -directories. - - -Table 1-7: IDE device information -.............................................................................. - File Content - cache The cache - capacity Capacity of the medium (in 512Byte blocks) - driver driver and version - geometry physical and logical geometry - identify device identify block - media media type - model device identifier - settings device setup - smart_thresholds IDE disk management thresholds - smart_values IDE disk management values -.............................................................................. - -The most interesting file is settings. This file contains a nice overview of -the drive parameters: - - # cat /proc/ide/ide0/hda/settings - name value min max mode - ---- ----- --- --- ---- - bios_cyl 526 0 65535 rw - bios_head 255 0 255 rw - bios_sect 63 0 63 rw - breada_readahead 4 0 127 rw - bswap 0 0 1 r - file_readahead 72 0 2097151 rw - io_32bit 0 0 3 rw - keepsettings 0 0 1 rw - max_kb_per_request 122 1 127 rw - multcount 0 0 8 rw - nice1 1 0 1 rw - nowerr 0 0 1 rw - pio_mode write-only 0 255 w - slow 0 0 1 rw - unmaskirq 0 0 1 rw - using_dma 0 0 1 rw - - -1.4 Networking info in /proc/net --------------------------------- - -The subdirectory /proc/net follows the usual pattern. Table 1-8 shows the -additional values you get for IP version 6 if you configure the kernel to -support this. Table 1-9 lists the files and their meaning. - - -Table 1-8: IPv6 info in /proc/net -.............................................................................. - File Content - udp6 UDP sockets (IPv6) - tcp6 TCP sockets (IPv6) - raw6 Raw device statistics (IPv6) - igmp6 IP multicast addresses, which this host joined (IPv6) - if_inet6 List of IPv6 interface addresses - ipv6_route Kernel routing table for IPv6 - rt6_stats Global IPv6 routing tables statistics - sockstat6 Socket statistics (IPv6) - snmp6 Snmp data (IPv6) -.............................................................................. - - -Table 1-9: Network info in /proc/net -.............................................................................. - File Content - arp Kernel ARP table - dev network devices with statistics - dev_mcast the Layer2 multicast groups a device is listening too - (interface index, label, number of references, number of bound - addresses). - dev_stat network device status - ip_fwchains Firewall chain linkage - ip_fwnames Firewall chain names - ip_masq Directory containing the masquerading tables - ip_masquerade Major masquerading table - netstat Network statistics - raw raw device statistics - route Kernel routing table - rpc Directory containing rpc info - rt_cache Routing cache - snmp SNMP data - sockstat Socket statistics - tcp TCP sockets - udp UDP sockets - unix UNIX domain sockets - wireless Wireless interface data (Wavelan etc) - igmp IP multicast addresses, which this host joined - psched Global packet scheduler parameters. - netlink List of PF_NETLINK sockets - ip_mr_vifs List of multicast virtual interfaces - ip_mr_cache List of multicast routing cache -.............................................................................. - -You can use this information to see which network devices are available in -your system and how much traffic was routed over those devices: - - > cat /proc/net/dev - Inter-|Receive |[... - face |bytes packets errs drop fifo frame compressed multicast|[... - lo: 908188 5596 0 0 0 0 0 0 [... - ppp0:15475140 20721 410 0 0 410 0 0 [... - eth0: 614530 7085 0 0 0 0 0 1 [... - - ...] Transmit - ...] bytes packets errs drop fifo colls carrier compressed - ...] 908188 5596 0 0 0 0 0 0 - ...] 1375103 17405 0 0 0 0 0 0 - ...] 1703981 5535 0 0 0 3 0 0 - -In addition, each Channel Bond interface has its own directory. For -example, the bond0 device will have a directory called /proc/net/bond0/. -It will contain information that is specific to that bond, such as the -current slaves of the bond, the link status of the slaves, and how -many times the slaves link has failed. - -1.5 SCSI info -------------- - -If you have a SCSI host adapter in your system, you'll find a subdirectory -named after the driver for this adapter in /proc/scsi. You'll also see a list -of all recognized SCSI devices in /proc/scsi: - - >cat /proc/scsi/scsi - Attached devices: - Host: scsi0 Channel: 00 Id: 00 Lun: 00 - Vendor: IBM Model: DGHS09U Rev: 03E0 - Type: Direct-Access ANSI SCSI revision: 03 - Host: scsi0 Channel: 00 Id: 06 Lun: 00 - Vendor: PIONEER Model: CD-ROM DR-U06S Rev: 1.04 - Type: CD-ROM ANSI SCSI revision: 02 - - -The directory named after the driver has one file for each adapter found in -the system. These files contain information about the controller, including -the used IRQ and the IO address range. The amount of information shown is -dependent on the adapter you use. The example shows the output for an Adaptec -AHA-2940 SCSI adapter: - - > cat /proc/scsi/aic7xxx/0 - - Adaptec AIC7xxx driver version: 5.1.19/3.2.4 - Compile Options: - TCQ Enabled By Default : Disabled - AIC7XXX_PROC_STATS : Disabled - AIC7XXX_RESET_DELAY : 5 - Adapter Configuration: - SCSI Adapter: Adaptec AHA-294X Ultra SCSI host adapter - Ultra Wide Controller - PCI MMAPed I/O Base: 0xeb001000 - Adapter SEEPROM Config: SEEPROM found and used. - Adaptec SCSI BIOS: Enabled - IRQ: 10 - SCBs: Active 0, Max Active 2, - Allocated 15, HW 16, Page 255 - Interrupts: 160328 - BIOS Control Word: 0x18b6 - Adapter Control Word: 0x005b - Extended Translation: Enabled - Disconnect Enable Flags: 0xffff - Ultra Enable Flags: 0x0001 - Tag Queue Enable Flags: 0x0000 - Ordered Queue Tag Flags: 0x0000 - Default Tag Queue Depth: 8 - Tagged Queue By Device array for aic7xxx host instance 0: - {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255} - Actual queue depth per device for aic7xxx host instance 0: - {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1} - Statistics: - (scsi0:0:0:0) - Device using Wide/Sync transfers at 40.0 MByte/sec, offset 8 - Transinfo settings: current(12/8/1/0), goal(12/8/1/0), user(12/15/1/0) - Total transfers 160151 (74577 reads and 85574 writes) - (scsi0:0:6:0) - Device using Narrow/Sync transfers at 5.0 MByte/sec, offset 15 - Transinfo settings: current(50/15/0/0), goal(50/15/0/0), user(50/15/0/0) - Total transfers 0 (0 reads and 0 writes) - - -1.6 Parallel port info in /proc/parport ---------------------------------------- - -The directory /proc/parport contains information about the parallel ports of -your system. It has one subdirectory for each port, named after the port -number (0,1,2,...). - -These directories contain the four files shown in Table 1-10. - - -Table 1-10: Files in /proc/parport -.............................................................................. - File Content - autoprobe Any IEEE-1284 device ID information that has been acquired. - devices list of the device drivers using that port. A + will appear by the - name of the device currently using the port (it might not appear - against any). - hardware Parallel port's base address, IRQ line and DMA channel. - irq IRQ that parport is using for that port. This is in a separate - file to allow you to alter it by writing a new value in (IRQ - number or none). -.............................................................................. - -1.7 TTY info in /proc/tty -------------------------- - -Information about the available and actually used tty's can be found in the -directory /proc/tty.You'll find entries for drivers and line disciplines in -this directory, as shown in Table 1-11. - - -Table 1-11: Files in /proc/tty -.............................................................................. - File Content - drivers list of drivers and their usage - ldiscs registered line disciplines - driver/serial usage statistic and status of single tty lines -.............................................................................. - -To see which tty's are currently in use, you can simply look into the file -/proc/tty/drivers: - - > cat /proc/tty/drivers - pty_slave /dev/pts 136 0-255 pty:slave - pty_master /dev/ptm 128 0-255 pty:master - pty_slave /dev/ttyp 3 0-255 pty:slave - pty_master /dev/pty 2 0-255 pty:master - serial /dev/cua 5 64-67 serial:callout - serial /dev/ttyS 4 64-67 serial - /dev/tty0 /dev/tty0 4 0 system:vtmaster - /dev/ptmx /dev/ptmx 5 2 system - /dev/console /dev/console 5 1 system:console - /dev/tty /dev/tty 5 0 system:/dev/tty - unknown /dev/tty 4 1-63 console - - -1.8 Miscellaneous kernel statistics in /proc/stat -------------------------------------------------- - -Various pieces of information about kernel activity are available in the -/proc/stat file. All of the numbers reported in this file are aggregates -since the system first booted. For a quick look, simply cat the file: - - > cat /proc/stat - cpu 2255 34 2290 22625563 6290 127 456 0 0 0 - cpu0 1132 34 1441 11311718 3675 127 438 0 0 0 - cpu1 1123 0 849 11313845 2614 0 18 0 0 0 - intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...] - ctxt 1990473 - btime 1062191376 - processes 2915 - procs_running 1 - procs_blocked 0 - softirq 183433 0 21755 12 39 1137 231 21459 2263 - -The very first "cpu" line aggregates the numbers in all of the other "cpuN" -lines. These numbers identify the amount of time the CPU has spent performing -different kinds of work. Time units are in USER_HZ (typically hundredths of a -second). The meanings of the columns are as follows, from left to right: - -- user: normal processes executing in user mode -- nice: niced processes executing in user mode -- system: processes executing in kernel mode -- idle: twiddling thumbs -- iowait: In a word, iowait stands for waiting for I/O to complete. But there - are several problems: - 1. Cpu will not wait for I/O to complete, iowait is the time that a task is - waiting for I/O to complete. When cpu goes into idle state for - outstanding task io, another task will be scheduled on this CPU. - 2. In a multi-core CPU, the task waiting for I/O to complete is not running - on any CPU, so the iowait of each CPU is difficult to calculate. - 3. The value of iowait field in /proc/stat will decrease in certain - conditions. - So, the iowait is not reliable by reading from /proc/stat. -- irq: servicing interrupts -- softirq: servicing softirqs -- steal: involuntary wait -- guest: running a normal guest -- guest_nice: running a niced guest - -The "intr" line gives counts of interrupts serviced since boot time, for each -of the possible system interrupts. The first column is the total of all -interrupts serviced including unnumbered architecture specific interrupts; -each subsequent column is the total for that particular numbered interrupt. -Unnumbered interrupts are not shown, only summed into the total. - -The "ctxt" line gives the total number of context switches across all CPUs. - -The "btime" line gives the time at which the system booted, in seconds since -the Unix epoch. - -The "processes" line gives the number of processes and threads created, which -includes (but is not limited to) those created by calls to the fork() and -clone() system calls. - -The "procs_running" line gives the total number of threads that are -running or ready to run (i.e., the total number of runnable threads). - -The "procs_blocked" line gives the number of processes currently blocked, -waiting for I/O to complete. - -The "softirq" line gives counts of softirqs serviced since boot time, for each -of the possible system softirqs. The first column is the total of all -softirqs serviced; each subsequent column is the total for that particular -softirq. - - -1.9 Ext4 file system parameters -------------------------------- - -Information about mounted ext4 file systems can be found in -/proc/fs/ext4. Each mounted filesystem will have a directory in -/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or -/proc/fs/ext4/dm-0). The files in each per-device directory are shown -in Table 1-12, below. - -Table 1-12: Files in /proc/fs/ext4/ -.............................................................................. - File Content - mb_groups details of multiblock allocator buddy cache of free blocks -.............................................................................. - -2.0 /proc/consoles ------------------- -Shows registered system console lines. - -To see which character device lines are currently used for the system console -/dev/console, you may simply look into the file /proc/consoles: - - > cat /proc/consoles - tty0 -WU (ECp) 4:7 - ttyS0 -W- (Ep) 4:64 - -The columns are: - - device name of the device - operations R = can do read operations - W = can do write operations - U = can do unblank - flags E = it is enabled - C = it is preferred console - B = it is primary boot console - p = it is used for printk buffer - b = it is not a TTY but a Braille device - a = it is safe to use when cpu is offline - major:minor major and minor number of the device separated by a colon - ------------------------------------------------------------------------------- -Summary ------------------------------------------------------------------------------- -The /proc file system serves information about the running system. It not only -allows access to process data but also allows you to request the kernel status -by reading files in the hierarchy. - -The directory structure of /proc reflects the types of information and makes -it easy, if not obvious, where to look for specific data. ------------------------------------------------------------------------------- - ------------------------------------------------------------------------------- -CHAPTER 2: MODIFYING SYSTEM PARAMETERS ------------------------------------------------------------------------------- - ------------------------------------------------------------------------------- -In This Chapter ------------------------------------------------------------------------------- -* Modifying kernel parameters by writing into files found in /proc/sys -* Exploring the files which modify certain parameters -* Review of the /proc/sys file tree ------------------------------------------------------------------------------- - - -A very interesting part of /proc is the directory /proc/sys. This is not only -a source of information, it also allows you to change parameters within the -kernel. Be very careful when attempting this. You can optimize your system, -but you can also cause it to crash. Never alter kernel parameters on a -production system. Set up a development machine and test to make sure that -everything works the way you want it to. You may have no alternative but to -reboot the machine once an error has been made. - -To change a value, simply echo the new value into the file. An example is -given below in the section on the file system data. You need to be root to do -this. You can create your own boot script to perform this every time your -system boots. - -The files in /proc/sys can be used to fine tune and monitor miscellaneous and -general things in the operation of the Linux kernel. Since some of the files -can inadvertently disrupt your system, it is advisable to read both -documentation and source before actually making adjustments. In any case, be -very careful when writing to any of these files. The entries in /proc may -change slightly between the 2.1.* and the 2.2 kernel, so if there is any doubt -review the kernel documentation in the directory /usr/src/linux/Documentation. -This chapter is heavily based on the documentation included in the pre 2.2 -kernels, and became part of it in version 2.2.1 of the Linux kernel. - -Please see: Documentation/admin-guide/sysctl/ directory for descriptions of these -entries. - ------------------------------------------------------------------------------- -Summary ------------------------------------------------------------------------------- -Certain aspects of kernel behavior can be modified at runtime, without the -need to recompile the kernel, or even to reboot the system. The files in the -/proc/sys tree can not only be read, but also modified. You can use the echo -command to write value into these files, thereby changing the default settings -of the kernel. ------------------------------------------------------------------------------- - ------------------------------------------------------------------------------- -CHAPTER 3: PER-PROCESS PARAMETERS ------------------------------------------------------------------------------- - -3.1 /proc//oom_adj & /proc//oom_score_adj- Adjust the oom-killer score --------------------------------------------------------------------------------- - -These file can be used to adjust the badness heuristic used to select which -process gets killed in out of memory conditions. - -The badness heuristic assigns a value to each candidate task ranging from 0 -(never kill) to 1000 (always kill) to determine which process is targeted. The -units are roughly a proportion along that range of allowed memory the process -may allocate from based on an estimation of its current memory and swap use. -For example, if a task is using all allowed memory, its badness score will be -1000. If it is using half of its allowed memory, its score will be 500. - -There is an additional factor included in the badness score: the current memory -and swap usage is discounted by 3% for root processes. - -The amount of "allowed" memory depends on the context in which the oom killer -was called. If it is due to the memory assigned to the allocating task's cpuset -being exhausted, the allowed memory represents the set of mems assigned to that -cpuset. If it is due to a mempolicy's node(s) being exhausted, the allowed -memory represents the set of mempolicy nodes. If it is due to a memory -limit (or swap limit) being reached, the allowed memory is that configured -limit. Finally, if it is due to the entire system being out of memory, the -allowed memory represents all allocatable resources. - -The value of /proc//oom_score_adj is added to the badness score before it -is used to determine which task to kill. Acceptable values range from -1000 -(OOM_SCORE_ADJ_MIN) to +1000 (OOM_SCORE_ADJ_MAX). This allows userspace to -polarize the preference for oom killing either by always preferring a certain -task or completely disabling it. The lowest possible value, -1000, is -equivalent to disabling oom killing entirely for that task since it will always -report a badness score of 0. - -Consequently, it is very simple for userspace to define the amount of memory to -consider for each task. Setting a /proc//oom_score_adj value of +500, for -example, is roughly equivalent to allowing the remainder of tasks sharing the -same system, cpuset, mempolicy, or memory controller resources to use at least -50% more memory. A value of -500, on the other hand, would be roughly -equivalent to discounting 50% of the task's allowed memory from being considered -as scoring against the task. - -For backwards compatibility with previous kernels, /proc//oom_adj may also -be used to tune the badness score. Its acceptable values range from -16 -(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 -(OOM_DISABLE) to disable oom killing entirely for that task. Its value is -scaled linearly with /proc//oom_score_adj. - -The value of /proc//oom_score_adj may be reduced no lower than the last -value set by a CAP_SYS_RESOURCE process. To reduce the value any lower -requires CAP_SYS_RESOURCE. - -Caveat: when a parent task is selected, the oom killer will sacrifice any first -generation children with separate address spaces instead, if possible. This -avoids servers and important system daemons from being killed and loses the -minimal amount of work. - - -3.2 /proc//oom_score - Display current oom-killer score -------------------------------------------------------------- - -This file can be used to check the current score used by the oom-killer is for -any given . Use it together with /proc//oom_score_adj to tune which -process should be killed in an out-of-memory situation. - - -3.3 /proc//io - Display the IO accounting fields -------------------------------------------------------- - -This file contains IO statistics for each running process - -Example -------- - -test:/tmp # dd if=/dev/zero of=/tmp/test.dat & -[1] 3828 - -test:/tmp # cat /proc/3828/io -rchar: 323934931 -wchar: 323929600 -syscr: 632687 -syscw: 632675 -read_bytes: 0 -write_bytes: 323932160 -cancelled_write_bytes: 0 - - -Description ------------ - -rchar ------ - -I/O counter: chars read -The number of bytes which this task has caused to be read from storage. This -is simply the sum of bytes which this process passed to read() and pread(). -It includes things like tty IO and it is unaffected by whether or not actual -physical disk IO was required (the read might have been satisfied from -pagecache) - - -wchar ------ - -I/O counter: chars written -The number of bytes which this task has caused, or shall cause to be written -to disk. Similar caveats apply here as with rchar. - - -syscr ------ - -I/O counter: read syscalls -Attempt to count the number of read I/O operations, i.e. syscalls like read() -and pread(). - - -syscw ------ - -I/O counter: write syscalls -Attempt to count the number of write I/O operations, i.e. syscalls like -write() and pwrite(). - - -read_bytes ----------- - -I/O counter: bytes read -Attempt to count the number of bytes which this process really did cause to -be fetched from the storage layer. Done at the submit_bio() level, so it is -accurate for block-backed filesystems. - - -write_bytes ------------ - -I/O counter: bytes written -Attempt to count the number of bytes which this process caused to be sent to -the storage layer. This is done at page-dirtying time. - - -cancelled_write_bytes ---------------------- - -The big inaccuracy here is truncate. If a process writes 1MB to a file and -then deletes the file, it will in fact perform no writeout. But it will have -been accounted as having caused 1MB of write. -In other words: The number of bytes which this process caused to not happen, -by truncating pagecache. A task can cause "negative" IO too. If this task -truncates some dirty pagecache, some IO which another task has been accounted -for (in its write_bytes) will not be happening. We _could_ just subtract that -from the truncating task's write_bytes, but there is information loss in doing -that. - - -Note ----- - -At its current implementation state, this is a bit racy on 32-bit machines: if -process A reads process B's /proc/pid/io while process B is updating one of -those 64-bit counters, process A could see an intermediate result. - - -More information about this can be found within the taskstats documentation in -Documentation/accounting. - -3.4 /proc//coredump_filter - Core dump filtering settings ---------------------------------------------------------------- -When a process is dumped, all anonymous memory is written to a core file as -long as the size of the core file isn't limited. But sometimes we don't want -to dump some memory segments, for example, huge shared memory or DAX. -Conversely, sometimes we want to save file-backed memory segments into a core -file, not only the individual files. - -/proc//coredump_filter allows you to customize which memory segments -will be dumped when the process is dumped. coredump_filter is a bitmask -of memory types. If a bit of the bitmask is set, memory segments of the -corresponding memory type are dumped, otherwise they are not dumped. - -The following 9 memory types are supported: - - (bit 0) anonymous private memory - - (bit 1) anonymous shared memory - - (bit 2) file-backed private memory - - (bit 3) file-backed shared memory - - (bit 4) ELF header pages in file-backed private memory areas (it is - effective only if the bit 2 is cleared) - - (bit 5) hugetlb private memory - - (bit 6) hugetlb shared memory - - (bit 7) DAX private memory - - (bit 8) DAX shared memory - - Note that MMIO pages such as frame buffer are never dumped and vDSO pages - are always dumped regardless of the bitmask status. - - Note that bits 0-4 don't affect hugetlb or DAX memory. hugetlb memory is - only affected by bit 5-6, and DAX is only affected by bits 7-8. - -The default value of coredump_filter is 0x33; this means all anonymous memory -segments, ELF header pages and hugetlb private memory are dumped. - -If you don't want to dump all shared memory segments attached to pid 1234, -write 0x31 to the process's proc file. - - $ echo 0x31 > /proc/1234/coredump_filter - -When a new process is created, the process inherits the bitmask status from its -parent. It is useful to set up coredump_filter before the program runs. -For example: - - $ echo 0x7 > /proc/self/coredump_filter - $ ./some_program - -3.5 /proc//mountinfo - Information about mounts --------------------------------------------------------- - -This file contains lines of the form: - -36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue -(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) - -(1) mount ID: unique identifier of the mount (may be reused after umount) -(2) parent ID: ID of parent (or of self for the top of the mount tree) -(3) major:minor: value of st_dev for files on filesystem -(4) root: root of the mount within the filesystem -(5) mount point: mount point relative to the process's root -(6) mount options: per mount options -(7) optional fields: zero or more fields of the form "tag[:value]" -(8) separator: marks the end of the optional fields -(9) filesystem type: name of filesystem of the form "type[.subtype]" -(10) mount source: filesystem specific information or "none" -(11) super options: per super block options - -Parsers should ignore all unrecognised optional fields. Currently the -possible optional fields are: - -shared:X mount is shared in peer group X -master:X mount is slave to peer group X -propagate_from:X mount is slave and receives propagation from peer group X (*) -unbindable mount is unbindable - -(*) X is the closest dominant peer group under the process's root. If -X is the immediate master of the mount, or if there's no dominant peer -group under the same root, then only the "master:X" field is present -and not the "propagate_from:X" field. - -For more information on mount propagation see: - - Documentation/filesystems/sharedsubtree.txt - - -3.6 /proc//comm & /proc//task//comm --------------------------------------------------------- -These files provide a method to access a tasks comm value. It also allows for -a task to set its own or one of its thread siblings comm value. The comm value -is limited in size compared to the cmdline value, so writing anything longer -then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated -comm value. - - -3.7 /proc//task//children - Information about task children -------------------------------------------------------------------------- -This file provides a fast way to retrieve first level children pids -of a task pointed by / pair. The format is a space separated -stream of pids. - -Note the "first level" here -- if a child has own children they will -not be listed here, one needs to read /proc//task//children -to obtain the descendants. - -Since this interface is intended to be fast and cheap it doesn't -guarantee to provide precise results and some children might be -skipped, especially if they've exited right after we printed their -pids, so one need to either stop or freeze processes being inspected -if precise results are needed. - - -3.8 /proc//fdinfo/ - Information about opened file ---------------------------------------------------------------- -This file provides information associated with an opened file. The regular -files have at least three fields -- 'pos', 'flags' and mnt_id. The 'pos' -represents the current offset of the opened file in decimal form [see lseek(2) -for details], 'flags' denotes the octal O_xxx mask the file has been -created with [see open(2) for details] and 'mnt_id' represents mount ID of -the file system containing the opened file [see 3.5 /proc//mountinfo -for details]. - -A typical output is - - pos: 0 - flags: 0100002 - mnt_id: 19 - -All locks associated with a file descriptor are shown in its fdinfo too. - -lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF - -The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags -pair provide additional information particular to the objects they represent. - - Eventfd files - ~~~~~~~~~~~~~ - pos: 0 - flags: 04002 - mnt_id: 9 - eventfd-count: 5a - - where 'eventfd-count' is hex value of a counter. - - Signalfd files - ~~~~~~~~~~~~~~ - pos: 0 - flags: 04002 - mnt_id: 9 - sigmask: 0000000000000200 - - where 'sigmask' is hex value of the signal mask associated - with a file. - - Epoll files - ~~~~~~~~~~~ - pos: 0 - flags: 02 - mnt_id: 9 - tfd: 5 events: 1d data: ffffffffffffffff pos:0 ino:61af sdev:7 - - where 'tfd' is a target file descriptor number in decimal form, - 'events' is events mask being watched and the 'data' is data - associated with a target [see epoll(7) for more details]. - - The 'pos' is current offset of the target file in decimal form - [see lseek(2)], 'ino' and 'sdev' are inode and device numbers - where target file resides, all in hex format. - - Fsnotify files - ~~~~~~~~~~~~~~ - For inotify files the format is the following - - pos: 0 - flags: 02000000 - inotify wd:3 ino:9e7e sdev:800013 mask:800afce ignored_mask:0 fhandle-bytes:8 fhandle-type:1 f_handle:7e9e0000640d1b6d - - where 'wd' is a watch descriptor in decimal form, ie a target file - descriptor number, 'ino' and 'sdev' are inode and device where the - target file resides and the 'mask' is the mask of events, all in hex - form [see inotify(7) for more details]. - - If the kernel was built with exportfs support, the path to the target - file is encoded as a file handle. The file handle is provided by three - fields 'fhandle-bytes', 'fhandle-type' and 'f_handle', all in hex - format. - - If the kernel is built without exportfs support the file handle won't be - printed out. - - If there is no inotify mark attached yet the 'inotify' line will be omitted. - - For fanotify files the format is - - pos: 0 - flags: 02 - mnt_id: 9 - fanotify flags:10 event-flags:0 - fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003 - fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4 - - where fanotify 'flags' and 'event-flags' are values used in fanotify_init - call, 'mnt_id' is the mount point identifier, 'mflags' is the value of - flags associated with mark which are tracked separately from events - mask. 'ino', 'sdev' are target inode and device, 'mask' is the events - mask and 'ignored_mask' is the mask of events which are to be ignored. - All in hex format. Incorporation of 'mflags', 'mask' and 'ignored_mask' - does provide information about flags and mask used in fanotify_mark - call [see fsnotify manpage for details]. - - While the first three lines are mandatory and always printed, the rest is - optional and may be omitted if no marks created yet. - - Timerfd files - ~~~~~~~~~~~~~ - - pos: 0 - flags: 02 - mnt_id: 9 - clockid: 0 - ticks: 0 - settime flags: 01 - it_value: (0, 49406829) - it_interval: (1, 0) - - where 'clockid' is the clock type and 'ticks' is the number of the timer expirations - that have occurred [see timerfd_create(2) for details]. 'settime flags' are - flags in octal form been used to setup the timer [see timerfd_settime(2) for - details]. 'it_value' is remaining time until the timer exiration. - 'it_interval' is the interval for the timer. Note the timer might be set up - with TIMER_ABSTIME option which will be shown in 'settime flags', but 'it_value' - still exhibits timer's remaining time. - -3.9 /proc//map_files - Information about memory mapped files ---------------------------------------------------------------------- -This directory contains symbolic links which represent memory mapped files -the process is maintaining. Example output: - - | lr-------- 1 root root 64 Jan 27 11:24 333c600000-333c620000 -> /usr/lib64/ld-2.18.so - | lr-------- 1 root root 64 Jan 27 11:24 333c81f000-333c820000 -> /usr/lib64/ld-2.18.so - | lr-------- 1 root root 64 Jan 27 11:24 333c820000-333c821000 -> /usr/lib64/ld-2.18.so - | ... - | lr-------- 1 root root 64 Jan 27 11:24 35d0421000-35d0422000 -> /usr/lib64/libselinux.so.1 - | lr-------- 1 root root 64 Jan 27 11:24 400000-41a000 -> /usr/bin/ls - -The name of a link represents the virtual memory bounds of a mapping, i.e. -vm_area_struct::vm_start-vm_area_struct::vm_end. - -The main purpose of the map_files is to retrieve a set of memory mapped -files in a fast way instead of parsing /proc//maps or -/proc//smaps, both of which contain many more records. At the same -time one can open(2) mappings from the listings of two processes and -comparing their inode numbers to figure out which anonymous memory areas -are actually shared. - -3.10 /proc//timerslack_ns - Task timerslack value ---------------------------------------------------------- -This file provides the value of the task's timerslack value in nanoseconds. -This value specifies a amount of time that normal timers may be deferred -in order to coalesce timers and avoid unnecessary wakeups. - -This allows a task's interactivity vs power consumption trade off to be -adjusted. - -Writing 0 to the file will set the tasks timerslack to the default value. - -Valid values are from 0 - ULLONG_MAX - -An application setting the value must have PTRACE_MODE_ATTACH_FSCREDS level -permissions on the task specified to change its timerslack_ns value. - -3.11 /proc//patch_state - Livepatch patch operation state ------------------------------------------------------------------ -When CONFIG_LIVEPATCH is enabled, this file displays the value of the -patch state for the task. - -A value of '-1' indicates that no patch is in transition. - -A value of '0' indicates that a patch is in transition and the task is -unpatched. If the patch is being enabled, then the task hasn't been -patched yet. If the patch is being disabled, then the task has already -been unpatched. - -A value of '1' indicates that a patch is in transition and the task is -patched. If the patch is being enabled, then the task has already been -patched. If the patch is being disabled, then the task hasn't been -unpatched yet. - -3.12 /proc//arch_status - task architecture specific status -------------------------------------------------------------------- -When CONFIG_PROC_PID_ARCH_STATUS is enabled, this file displays the -architecture specific status of the task. - -Example -------- - $ cat /proc/6753/arch_status - AVX512_elapsed_ms: 8 - -Description ------------ - -x86 specific entries: ---------------------- - AVX512_elapsed_ms: - ------------------ - If AVX512 is supported on the machine, this entry shows the milliseconds - elapsed since the last time AVX512 usage was recorded. The recording - happens on a best effort basis when a task is scheduled out. This means - that the value depends on two factors: - - 1) The time which the task spent on the CPU without being scheduled - out. With CPU isolation and a single runnable task this can take - several seconds. - - 2) The time since the task was scheduled out last. Depending on the - reason for being scheduled out (time slice exhausted, syscall ...) - this can be arbitrary long time. - - As a consequence the value cannot be considered precise and authoritative - information. The application which uses this information has to be aware - of the overall scenario on the system in order to determine whether a - task is a real AVX512 user or not. Precise information can be obtained - with performance counters. - - A special value of '-1' indicates that no AVX512 usage was recorded, thus - the task is unlikely an AVX512 user, but depends on the workload and the - scheduling scenario, it also could be a false negative mentioned above. - ------------------------------------------------------------------------------- -Configuring procfs ------------------------------------------------------------------------------- - -4.1 Mount options ---------------------- - -The following mount options are supported: - - hidepid= Set /proc// access mode. - gid= Set the group authorized to learn processes information. - -hidepid=0 means classic mode - everybody may access all /proc// directories -(default). - -hidepid=1 means users may not access any /proc// directories but their -own. Sensitive files like cmdline, sched*, status are now protected against -other users. This makes it impossible to learn whether any user runs -specific program (given the program doesn't reveal itself by its behaviour). -As an additional bonus, as /proc//cmdline is unaccessible for other users, -poorly written programs passing sensitive information via program arguments are -now protected against local eavesdroppers. - -hidepid=2 means hidepid=1 plus all /proc// will be fully invisible to other -users. It doesn't mean that it hides a fact whether a process with a specific -pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"), -but it hides process' uid and gid, which may be learned by stat()'ing -/proc// otherwise. It greatly complicates an intruder's task of gathering -information about running processes, whether some daemon runs with elevated -privileges, whether other user runs some sensitive program, whether other users -run any program at all, etc. - -gid= defines a group authorized to learn processes information otherwise -prohibited by hidepid=. If you use some daemon like identd which needs to learn -information about processes information, just add identd to this group. -- cgit v1.2.3 From d5eefa2c5e567751df74d38d5b8cec7ed6e7a08c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:19 +0100 Subject: docs: filesystems: convert qnx6.txt to ReST - Add a SPDX header; - Adjust document title; - Some whitespace fixes and new line breaks; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/ccd22c1e1426ce4cb30ece9a71c39ebb41844762.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/qnx6.rst | 196 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/qnx6.txt | 174 -------------------------------- 3 files changed, 197 insertions(+), 174 deletions(-) create mode 100644 Documentation/filesystems/qnx6.rst delete mode 100644 Documentation/filesystems/qnx6.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 671906e2fee6..08883a481a76 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -82,5 +82,6 @@ Documentation for filesystem implementations. orangefs overlayfs proc + qnx6 virtiofs vfat diff --git a/Documentation/filesystems/qnx6.rst b/Documentation/filesystems/qnx6.rst new file mode 100644 index 000000000000..b71308314070 --- /dev/null +++ b/Documentation/filesystems/qnx6.rst @@ -0,0 +1,196 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=================== +The QNX6 Filesystem +=================== + +The qnx6fs is used by newer QNX operating system versions. (e.g. Neutrino) +It got introduced in QNX 6.4.0 and is used default since 6.4.1. + +Option +====== + +mmi_fs Mount filesystem as used for example by Audi MMI 3G system + +Specification +============= + +qnx6fs shares many properties with traditional Unix filesystems. It has the +concepts of blocks, inodes and directories. + +On QNX it is possible to create little endian and big endian qnx6 filesystems. +This feature makes it possible to create and use a different endianness fs +for the target (QNX is used on quite a range of embedded systems) platform +running on a different endianness. + +The Linux driver handles endianness transparently. (LE and BE) + +Blocks +------ + +The space in the device or file is split up into blocks. These are a fixed +size of 512, 1024, 2048 or 4096, which is decided when the filesystem is +created. + +Blockpointers are 32bit, so the maximum space that can be addressed is +2^32 * 4096 bytes or 16TB + +The superblocks +--------------- + +The superblock contains all global information about the filesystem. +Each qnx6fs got two superblocks, each one having a 64bit serial number. +That serial number is used to identify the "active" superblock. +In write mode with reach new snapshot (after each synchronous write), the +serial of the new master superblock is increased (old superblock serial + 1) + +So basically the snapshot functionality is realized by an atomic final +update of the serial number. Before updating that serial, all modifications +are done by copying all modified blocks during that specific write request +(or period) and building up a new (stable) filesystem structure under the +inactive superblock. + +Each superblock holds a set of root inodes for the different filesystem +parts. (Inode, Bitmap and Longfilenames) +Each of these root nodes holds information like total size of the stored +data and the addressing levels in that specific tree. +If the level value is 0, up to 16 direct blocks can be addressed by each +node. + +Level 1 adds an additional indirect addressing level where each indirect +addressing block holds up to blocksize / 4 bytes pointers to data blocks. +Level 2 adds an additional indirect addressing block level (so, already up +to 16 * 256 * 256 = 1048576 blocks that can be addressed by such a tree). + +Unused block pointers are always set to ~0 - regardless of root node, +indirect addressing blocks or inodes. + +Data leaves are always on the lowest level. So no data is stored on upper +tree levels. + +The first Superblock is located at 0x2000. (0x2000 is the bootblock size) +The Audi MMI 3G first superblock directly starts at byte 0. + +Second superblock position can either be calculated from the superblock +information (total number of filesystem blocks) or by taking the highest +device address, zeroing the last 3 bytes and then subtracting 0x1000 from +that address. + +0x1000 is the size reserved for each superblock - regardless of the +blocksize of the filesystem. + +Inodes +------ + +Each object in the filesystem is represented by an inode. (index node) +The inode structure contains pointers to the filesystem blocks which contain +the data held in the object and all of the metadata about an object except +its longname. (filenames longer than 27 characters) +The metadata about an object includes the permissions, owner, group, flags, +size, number of blocks used, access time, change time and modification time. + +Object mode field is POSIX format. (which makes things easier) + +There are also pointers to the first 16 blocks, if the object data can be +addressed with 16 direct blocks. + +For more than 16 blocks an indirect addressing in form of another tree is +used. (scheme is the same as the one used for the superblock root nodes) + +The filesize is stored 64bit. Inode counting starts with 1. (while long +filename inodes start with 0) + +Directories +----------- + +A directory is a filesystem object and has an inode just like a file. +It is a specially formatted file containing records which associate each +name with an inode number. + +'.' inode number points to the directory inode + +'..' inode number points to the parent directory inode + +Eeach filename record additionally got a filename length field. + +One special case are long filenames or subdirectory names. + +These got set a filename length field of 0xff in the corresponding directory +record plus the longfile inode number also stored in that record. + +With that longfilename inode number, the longfilename tree can be walked +starting with the superblock longfilename root node pointers. + +Special files +------------- + +Symbolic links are also filesystem objects with inodes. They got a specific +bit in the inode mode field identifying them as symbolic link. + +The directory entry file inode pointer points to the target file inode. + +Hard links got an inode, a directory entry, but a specific mode bit set, +no block pointers and the directory file record pointing to the target file +inode. + +Character and block special devices do not exist in QNX as those files +are handled by the QNX kernel/drivers and created in /dev independent of the +underlaying filesystem. + +Long filenames +-------------- + +Long filenames are stored in a separate addressing tree. The staring point +is the longfilename root node in the active superblock. + +Each data block (tree leaves) holds one long filename. That filename is +limited to 510 bytes. The first two starting bytes are used as length field +for the actual filename. + +If that structure shall fit for all allowed blocksizes, it is clear why there +is a limit of 510 bytes for the actual filename stored. + +Bitmap +------ + +The qnx6fs filesystem allocation bitmap is stored in a tree under bitmap +root node in the superblock and each bit in the bitmap represents one +filesystem block. + +The first block is block 0, which starts 0x1000 after superblock start. +So for a normal qnx6fs 0x3000 (bootblock + superblock) is the physical +address at which block 0 is located. + +Bits at the end of the last bitmap block are set to 1, if the device is +smaller than addressing space in the bitmap. + +Bitmap system area +------------------ + +The bitmap itself is divided into three parts. + +First the system area, that is split into two halves. + +Then userspace. + +The requirement for a static, fixed preallocated system area comes from how +qnx6fs deals with writes. + +Each superblock got it's own half of the system area. So superblock #1 +always uses blocks from the lower half while superblock #2 just writes to +blocks represented by the upper half bitmap system area bits. + +Bitmap blocks, Inode blocks and indirect addressing blocks for those two +tree structures are treated as system blocks. + +The rational behind that is that a write request can work on a new snapshot +(system area of the inactive - resp. lower serial numbered superblock) while +at the same time there is still a complete stable filesystem structer in the +other half of the system area. + +When finished with writing (a sync write is completed, the maximum sync leap +time or a filesystem sync is requested), serial of the previously inactive +superblock atomically is increased and the fs switches over to that - then +stable declared - superblock. + +For all data outside the system area, blocks are just copied while writing. diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.txt deleted file mode 100644 index 48ea68f15845..000000000000 --- a/Documentation/filesystems/qnx6.txt +++ /dev/null @@ -1,174 +0,0 @@ -The QNX6 Filesystem -=================== - -The qnx6fs is used by newer QNX operating system versions. (e.g. Neutrino) -It got introduced in QNX 6.4.0 and is used default since 6.4.1. - -Option -====== - -mmi_fs Mount filesystem as used for example by Audi MMI 3G system - -Specification -============= - -qnx6fs shares many properties with traditional Unix filesystems. It has the -concepts of blocks, inodes and directories. -On QNX it is possible to create little endian and big endian qnx6 filesystems. -This feature makes it possible to create and use a different endianness fs -for the target (QNX is used on quite a range of embedded systems) platform -running on a different endianness. -The Linux driver handles endianness transparently. (LE and BE) - -Blocks ------- - -The space in the device or file is split up into blocks. These are a fixed -size of 512, 1024, 2048 or 4096, which is decided when the filesystem is -created. -Blockpointers are 32bit, so the maximum space that can be addressed is -2^32 * 4096 bytes or 16TB - -The superblocks ---------------- - -The superblock contains all global information about the filesystem. -Each qnx6fs got two superblocks, each one having a 64bit serial number. -That serial number is used to identify the "active" superblock. -In write mode with reach new snapshot (after each synchronous write), the -serial of the new master superblock is increased (old superblock serial + 1) - -So basically the snapshot functionality is realized by an atomic final -update of the serial number. Before updating that serial, all modifications -are done by copying all modified blocks during that specific write request -(or period) and building up a new (stable) filesystem structure under the -inactive superblock. - -Each superblock holds a set of root inodes for the different filesystem -parts. (Inode, Bitmap and Longfilenames) -Each of these root nodes holds information like total size of the stored -data and the addressing levels in that specific tree. -If the level value is 0, up to 16 direct blocks can be addressed by each -node. -Level 1 adds an additional indirect addressing level where each indirect -addressing block holds up to blocksize / 4 bytes pointers to data blocks. -Level 2 adds an additional indirect addressing block level (so, already up -to 16 * 256 * 256 = 1048576 blocks that can be addressed by such a tree). - -Unused block pointers are always set to ~0 - regardless of root node, -indirect addressing blocks or inodes. -Data leaves are always on the lowest level. So no data is stored on upper -tree levels. - -The first Superblock is located at 0x2000. (0x2000 is the bootblock size) -The Audi MMI 3G first superblock directly starts at byte 0. -Second superblock position can either be calculated from the superblock -information (total number of filesystem blocks) or by taking the highest -device address, zeroing the last 3 bytes and then subtracting 0x1000 from -that address. - -0x1000 is the size reserved for each superblock - regardless of the -blocksize of the filesystem. - -Inodes ------- - -Each object in the filesystem is represented by an inode. (index node) -The inode structure contains pointers to the filesystem blocks which contain -the data held in the object and all of the metadata about an object except -its longname. (filenames longer than 27 characters) -The metadata about an object includes the permissions, owner, group, flags, -size, number of blocks used, access time, change time and modification time. - -Object mode field is POSIX format. (which makes things easier) - -There are also pointers to the first 16 blocks, if the object data can be -addressed with 16 direct blocks. -For more than 16 blocks an indirect addressing in form of another tree is -used. (scheme is the same as the one used for the superblock root nodes) - -The filesize is stored 64bit. Inode counting starts with 1. (while long -filename inodes start with 0) - -Directories ------------ - -A directory is a filesystem object and has an inode just like a file. -It is a specially formatted file containing records which associate each -name with an inode number. -'.' inode number points to the directory inode -'..' inode number points to the parent directory inode -Eeach filename record additionally got a filename length field. - -One special case are long filenames or subdirectory names. -These got set a filename length field of 0xff in the corresponding directory -record plus the longfile inode number also stored in that record. -With that longfilename inode number, the longfilename tree can be walked -starting with the superblock longfilename root node pointers. - -Special files -------------- - -Symbolic links are also filesystem objects with inodes. They got a specific -bit in the inode mode field identifying them as symbolic link. -The directory entry file inode pointer points to the target file inode. - -Hard links got an inode, a directory entry, but a specific mode bit set, -no block pointers and the directory file record pointing to the target file -inode. - -Character and block special devices do not exist in QNX as those files -are handled by the QNX kernel/drivers and created in /dev independent of the -underlaying filesystem. - -Long filenames --------------- - -Long filenames are stored in a separate addressing tree. The staring point -is the longfilename root node in the active superblock. -Each data block (tree leaves) holds one long filename. That filename is -limited to 510 bytes. The first two starting bytes are used as length field -for the actual filename. -If that structure shall fit for all allowed blocksizes, it is clear why there -is a limit of 510 bytes for the actual filename stored. - -Bitmap ------- - -The qnx6fs filesystem allocation bitmap is stored in a tree under bitmap -root node in the superblock and each bit in the bitmap represents one -filesystem block. -The first block is block 0, which starts 0x1000 after superblock start. -So for a normal qnx6fs 0x3000 (bootblock + superblock) is the physical -address at which block 0 is located. - -Bits at the end of the last bitmap block are set to 1, if the device is -smaller than addressing space in the bitmap. - -Bitmap system area ------------------- - -The bitmap itself is divided into three parts. -First the system area, that is split into two halves. -Then userspace. - -The requirement for a static, fixed preallocated system area comes from how -qnx6fs deals with writes. -Each superblock got it's own half of the system area. So superblock #1 -always uses blocks from the lower half while superblock #2 just writes to -blocks represented by the upper half bitmap system area bits. - -Bitmap blocks, Inode blocks and indirect addressing blocks for those two -tree structures are treated as system blocks. - -The rational behind that is that a write request can work on a new snapshot -(system area of the inactive - resp. lower serial numbered superblock) while -at the same time there is still a complete stable filesystem structer in the -other half of the system area. - -When finished with writing (a sync write is completed, the maximum sync leap -time or a filesystem sync is requested), serial of the previously inactive -superblock atomically is increased and the fs switches over to that - then -stable declared - superblock. - -For all data outside the system area, blocks are just copied while writing. -- cgit v1.2.3 From 8979fc9a282441d086ead589528c711d9df3d94a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:20 +0100 Subject: docs: filesystems: convert ramfs-rootfs-initramfs.txt to ReST - Add a SPDX header; - Add a document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Use notes markups; - Add lists markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/89cbcc99a6371f3bff3ea1668fe497e8a15c226b.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + .../filesystems/ramfs-rootfs-initramfs.rst | 369 +++++++++++++++++++++ .../filesystems/ramfs-rootfs-initramfs.txt | 359 -------------------- 3 files changed, 370 insertions(+), 359 deletions(-) create mode 100644 Documentation/filesystems/ramfs-rootfs-initramfs.rst delete mode 100644 Documentation/filesystems/ramfs-rootfs-initramfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 08883a481a76..b8689d082911 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -83,5 +83,6 @@ Documentation for filesystem implementations. overlayfs proc qnx6 + ramfs-rootfs-initramfs virtiofs vfat diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.rst b/Documentation/filesystems/ramfs-rootfs-initramfs.rst new file mode 100644 index 000000000000..6c576e241d86 --- /dev/null +++ b/Documentation/filesystems/ramfs-rootfs-initramfs.rst @@ -0,0 +1,369 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +Ramfs, rootfs and initramfs +=========================== + +October 17, 2005 + +Rob Landley +============================= + +What is ramfs? +-------------- + +Ramfs is a very simple filesystem that exports Linux's disk caching +mechanisms (the page cache and dentry cache) as a dynamically resizable +RAM-based filesystem. + +Normally all files are cached in memory by Linux. Pages of data read from +backing store (usually the block device the filesystem is mounted on) are kept +around in case it's needed again, but marked as clean (freeable) in case the +Virtual Memory system needs the memory for something else. Similarly, data +written to files is marked clean as soon as it has been written to backing +store, but kept around for caching purposes until the VM reallocates the +memory. A similar mechanism (the dentry cache) greatly speeds up access to +directories. + +With ramfs, there is no backing store. Files written into ramfs allocate +dentries and page cache as usual, but there's nowhere to write them to. +This means the pages are never marked clean, so they can't be freed by the +VM when it's looking to recycle memory. + +The amount of code required to implement ramfs is tiny, because all the +work is done by the existing Linux caching infrastructure. Basically, +you're mounting the disk cache as a filesystem. Because of this, ramfs is not +an optional component removable via menuconfig, since there would be negligible +space savings. + +ramfs and ramdisk: +------------------ + +The older "ram disk" mechanism created a synthetic block device out of +an area of RAM and used it as backing store for a filesystem. This block +device was of fixed size, so the filesystem mounted on it was of fixed +size. Using a ram disk also required unnecessarily copying memory from the +fake block device into the page cache (and copying changes back out), as well +as creating and destroying dentries. Plus it needed a filesystem driver +(such as ext2) to format and interpret this data. + +Compared to ramfs, this wastes memory (and memory bus bandwidth), creates +unnecessary work for the CPU, and pollutes the CPU caches. (There are tricks +to avoid this copying by playing with the page tables, but they're unpleasantly +complicated and turn out to be about as expensive as the copying anyway.) +More to the point, all the work ramfs is doing has to happen _anyway_, +since all file access goes through the page and dentry caches. The RAM +disk is simply unnecessary; ramfs is internally much simpler. + +Another reason ramdisks are semi-obsolete is that the introduction of +loopback devices offered a more flexible and convenient way to create +synthetic block devices, now from files instead of from chunks of memory. +See losetup (8) for details. + +ramfs and tmpfs: +---------------- + +One downside of ramfs is you can keep writing data into it until you fill +up all memory, and the VM can't free it because the VM thinks that files +should get written to backing store (rather than swap space), but ramfs hasn't +got any backing store. Because of this, only root (or a trusted user) should +be allowed write access to a ramfs mount. + +A ramfs derivative called tmpfs was created to add size limits, and the ability +to write the data to swap space. Normal users can be allowed write access to +tmpfs mounts. See Documentation/filesystems/tmpfs.txt for more information. + +What is rootfs? +--------------- + +Rootfs is a special instance of ramfs (or tmpfs, if that's enabled), which is +always present in 2.6 systems. You can't unmount rootfs for approximately the +same reason you can't kill the init process; rather than having special code +to check for and handle an empty list, it's smaller and simpler for the kernel +to just make sure certain lists can't become empty. + +Most systems just mount another filesystem over rootfs and ignore it. The +amount of space an empty instance of ramfs takes up is tiny. + +If CONFIG_TMPFS is enabled, rootfs will use tmpfs instead of ramfs by +default. To force ramfs, add "rootfstype=ramfs" to the kernel command +line. + +What is initramfs? +------------------ + +All 2.6 Linux kernels contain a gzipped "cpio" format archive, which is +extracted into rootfs when the kernel boots up. After extracting, the kernel +checks to see if rootfs contains a file "init", and if so it executes it as PID +1. If found, this init process is responsible for bringing the system the +rest of the way up, including locating and mounting the real root device (if +any). If rootfs does not contain an init program after the embedded cpio +archive is extracted into it, the kernel will fall through to the older code +to locate and mount a root partition, then exec some variant of /sbin/init +out of that. + +All this differs from the old initrd in several ways: + + - The old initrd was always a separate file, while the initramfs archive is + linked into the linux kernel image. (The directory ``linux-*/usr`` is + devoted to generating this archive during the build.) + + - The old initrd file was a gzipped filesystem image (in some file format, + such as ext2, that needed a driver built into the kernel), while the new + initramfs archive is a gzipped cpio archive (like tar only simpler, + see cpio(1) and Documentation/driver-api/early-userspace/buffer-format.rst). + The kernel's cpio extraction code is not only extremely small, it's also + __init text and data that can be discarded during the boot process. + + - The program run by the old initrd (which was called /initrd, not /init) did + some setup and then returned to the kernel, while the init program from + initramfs is not expected to return to the kernel. (If /init needs to hand + off control it can overmount / with a new root device and exec another init + program. See the switch_root utility, below.) + + - When switching another root device, initrd would pivot_root and then + umount the ramdisk. But initramfs is rootfs: you can neither pivot_root + rootfs, nor unmount it. Instead delete everything out of rootfs to + free up the space (find -xdev / -exec rm '{}' ';'), overmount rootfs + with the new root (cd /newmount; mount --move . /; chroot .), attach + stdin/stdout/stderr to the new /dev/console, and exec the new init. + + Since this is a remarkably persnickety process (and involves deleting + commands before you can run them), the klibc package introduced a helper + program (utils/run_init.c) to do all this for you. Most other packages + (such as busybox) have named this command "switch_root". + +Populating initramfs: +--------------------- + +The 2.6 kernel build process always creates a gzipped cpio format initramfs +archive and links it into the resulting kernel binary. By default, this +archive is empty (consuming 134 bytes on x86). + +The config option CONFIG_INITRAMFS_SOURCE (in General Setup in menuconfig, +and living in usr/Kconfig) can be used to specify a source for the +initramfs archive, which will automatically be incorporated into the +resulting binary. This option can point to an existing gzipped cpio +archive, a directory containing files to be archived, or a text file +specification such as the following example:: + + dir /dev 755 0 0 + nod /dev/console 644 0 0 c 5 1 + nod /dev/loop0 644 0 0 b 7 0 + dir /bin 755 1000 1000 + slink /bin/sh busybox 777 0 0 + file /bin/busybox initramfs/busybox 755 0 0 + dir /proc 755 0 0 + dir /sys 755 0 0 + dir /mnt 755 0 0 + file /init initramfs/init.sh 755 0 0 + +Run "usr/gen_init_cpio" (after the kernel build) to get a usage message +documenting the above file format. + +One advantage of the configuration file is that root access is not required to +set permissions or create device nodes in the new archive. (Note that those +two example "file" entries expect to find files named "init.sh" and "busybox" in +a directory called "initramfs", under the linux-2.6.* directory. See +Documentation/driver-api/early-userspace/early_userspace_support.rst for more details.) + +The kernel does not depend on external cpio tools. If you specify a +directory instead of a configuration file, the kernel's build infrastructure +creates a configuration file from that directory (usr/Makefile calls +usr/gen_initramfs_list.sh), and proceeds to package up that directory +using the config file (by feeding it to usr/gen_init_cpio, which is created +from usr/gen_init_cpio.c). The kernel's build-time cpio creation code is +entirely self-contained, and the kernel's boot-time extractor is also +(obviously) self-contained. + +The one thing you might need external cpio utilities installed for is creating +or extracting your own preprepared cpio files to feed to the kernel build +(instead of a config file or directory). + +The following command line can extract a cpio image (either by the above script +or by the kernel build) back into its component files:: + + cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames + +The following shell script can create a prebuilt cpio archive you can +use in place of the above config file:: + + #!/bin/sh + + # Copyright 2006 Rob Landley and TimeSys Corporation. + # Licensed under GPL version 2 + + if [ $# -ne 2 ] + then + echo "usage: mkinitramfs directory imagename.cpio.gz" + exit 1 + fi + + if [ -d "$1" ] + then + echo "creating $2 from $1" + (cd "$1"; find . | cpio -o -H newc | gzip) > "$2" + else + echo "First argument must be a directory" + exit 1 + fi + +.. Note:: + + The cpio man page contains some bad advice that will break your initramfs + archive if you follow it. It says "A typical way to generate the list + of filenames is with the find command; you should give find the -depth + option to minimize problems with permissions on directories that are + unwritable or not searchable." Don't do this when creating + initramfs.cpio.gz images, it won't work. The Linux kernel cpio extractor + won't create files in a directory that doesn't exist, so the directory + entries must go before the files that go in those directories. + The above script gets them in the right order. + +External initramfs images: +-------------------------- + +If the kernel has initrd support enabled, an external cpio.gz archive can also +be passed into a 2.6 kernel in place of an initrd. In this case, the kernel +will autodetect the type (initramfs, not initrd) and extract the external cpio +archive into rootfs before trying to run /init. + +This has the memory efficiency advantages of initramfs (no ramdisk block +device) but the separate packaging of initrd (which is nice if you have +non-GPL code you'd like to run from initramfs, without conflating it with +the GPL licensed Linux kernel binary). + +It can also be used to supplement the kernel's built-in initramfs image. The +files in the external archive will overwrite any conflicting files in +the built-in initramfs archive. Some distributors also prefer to customize +a single kernel image with task-specific initramfs images, without recompiling. + +Contents of initramfs: +---------------------- + +An initramfs archive is a complete self-contained root filesystem for Linux. +If you don't already understand what shared libraries, devices, and paths +you need to get a minimal root filesystem up and running, here are some +references: + +- http://www.tldp.org/HOWTO/Bootdisk-HOWTO/ +- http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html +- http://www.linuxfromscratch.org/lfs/view/stable/ + +The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is +designed to be a tiny C library to statically link early userspace +code against, along with some related utilities. It is BSD licensed. + +I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net) +myself. These are LGPL and GPL, respectively. (A self-contained initramfs +package is planned for the busybox 1.3 release.) + +In theory you could use glibc, but that's not well suited for small embedded +uses like this. (A "hello world" program statically linked against glibc is +over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do +name lookups, even when otherwise statically linked.) + +A good first step is to get initramfs to run a statically linked "hello world" +program as init, and test it under an emulator like qemu (www.qemu.org) or +User Mode Linux, like so:: + + cat > hello.c << EOF + #include + #include + + int main(int argc, char *argv[]) + { + printf("Hello world!\n"); + sleep(999999999); + } + EOF + gcc -static hello.c -o init + echo init | cpio -o -H newc | gzip > test.cpio.gz + # Testing external initramfs using the initrd loading mechanism. + qemu -kernel /boot/vmlinuz -initrd test.cpio.gz /dev/zero + +When debugging a normal root filesystem, it's nice to be able to boot with +"init=/bin/sh". The initramfs equivalent is "rdinit=/bin/sh", and it's +just as useful. + +Why cpio rather than tar? +------------------------- + +This decision was made back in December, 2001. The discussion started here: + + http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1538.html + +And spawned a second thread (specifically on tar vs cpio), starting here: + + http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1587.html + +The quick and dirty summary version (which is no substitute for reading +the above threads) is: + +1) cpio is a standard. It's decades old (from the AT&T days), and already + widely used on Linux (inside RPM, Red Hat's device driver disks). Here's + a Linux Journal article about it from 1996: + + http://www.linuxjournal.com/article/1213 + + It's not as popular as tar because the traditional cpio command line tools + require _truly_hideous_ command line arguments. But that says nothing + either way about the archive format, and there are alternative tools, + such as: + + http://freecode.com/projects/afio + +2) The cpio archive format chosen by the kernel is simpler and cleaner (and + thus easier to create and parse) than any of the (literally dozens of) + various tar archive formats. The complete initramfs archive format is + explained in buffer-format.txt, created in usr/gen_init_cpio.c, and + extracted in init/initramfs.c. All three together come to less than 26k + total of human-readable text. + +3) The GNU project standardizing on tar is approximately as relevant as + Windows standardizing on zip. Linux is not part of either, and is free + to make its own technical decisions. + +4) Since this is a kernel internal format, it could easily have been + something brand new. The kernel provides its own tools to create and + extract this format anyway. Using an existing standard was preferable, + but not essential. + +5) Al Viro made the decision (quote: "tar is ugly as hell and not going to be + supported on the kernel side"): + + http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1540.html + + explained his reasoning: + + - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html + - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html + + and, most importantly, designed and implemented the initramfs code. + +Future directions: +------------------ + +Today (2.6.16), initramfs is always compiled in, but not always used. The +kernel falls back to legacy boot code that is reached only if initramfs does +not contain an /init program. The fallback is legacy code, there to ensure a +smooth transition and allowing early boot functionality to gradually move to +"early userspace" (I.E. initramfs). + +The move to early userspace is necessary because finding and mounting the real +root device is complex. Root partitions can span multiple devices (raid or +separate journal). They can be out on the network (requiring dhcp, setting a +specific MAC address, logging into a server, etc). They can live on removable +media, with dynamically allocated major/minor numbers and persistent naming +issues requiring a full udev implementation to sort out. They can be +compressed, encrypted, copy-on-write, loopback mounted, strangely partitioned, +and so on. + +This kind of complexity (which inevitably includes policy) is rightly handled +in userspace. Both klibc and busybox/uClibc are working on simple initramfs +packages to drop into a kernel build. + +The klibc package has now been accepted into Andrew Morton's 2.6.17-mm tree. +The kernel's current early boot code (partition detection, etc) will probably +be migrated into a default initramfs, automatically created and used by the +kernel build. diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt deleted file mode 100644 index 97d42ccaa92d..000000000000 --- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt +++ /dev/null @@ -1,359 +0,0 @@ -ramfs, rootfs and initramfs -October 17, 2005 -Rob Landley -============================= - -What is ramfs? --------------- - -Ramfs is a very simple filesystem that exports Linux's disk caching -mechanisms (the page cache and dentry cache) as a dynamically resizable -RAM-based filesystem. - -Normally all files are cached in memory by Linux. Pages of data read from -backing store (usually the block device the filesystem is mounted on) are kept -around in case it's needed again, but marked as clean (freeable) in case the -Virtual Memory system needs the memory for something else. Similarly, data -written to files is marked clean as soon as it has been written to backing -store, but kept around for caching purposes until the VM reallocates the -memory. A similar mechanism (the dentry cache) greatly speeds up access to -directories. - -With ramfs, there is no backing store. Files written into ramfs allocate -dentries and page cache as usual, but there's nowhere to write them to. -This means the pages are never marked clean, so they can't be freed by the -VM when it's looking to recycle memory. - -The amount of code required to implement ramfs is tiny, because all the -work is done by the existing Linux caching infrastructure. Basically, -you're mounting the disk cache as a filesystem. Because of this, ramfs is not -an optional component removable via menuconfig, since there would be negligible -space savings. - -ramfs and ramdisk: ------------------- - -The older "ram disk" mechanism created a synthetic block device out of -an area of RAM and used it as backing store for a filesystem. This block -device was of fixed size, so the filesystem mounted on it was of fixed -size. Using a ram disk also required unnecessarily copying memory from the -fake block device into the page cache (and copying changes back out), as well -as creating and destroying dentries. Plus it needed a filesystem driver -(such as ext2) to format and interpret this data. - -Compared to ramfs, this wastes memory (and memory bus bandwidth), creates -unnecessary work for the CPU, and pollutes the CPU caches. (There are tricks -to avoid this copying by playing with the page tables, but they're unpleasantly -complicated and turn out to be about as expensive as the copying anyway.) -More to the point, all the work ramfs is doing has to happen _anyway_, -since all file access goes through the page and dentry caches. The RAM -disk is simply unnecessary; ramfs is internally much simpler. - -Another reason ramdisks are semi-obsolete is that the introduction of -loopback devices offered a more flexible and convenient way to create -synthetic block devices, now from files instead of from chunks of memory. -See losetup (8) for details. - -ramfs and tmpfs: ----------------- - -One downside of ramfs is you can keep writing data into it until you fill -up all memory, and the VM can't free it because the VM thinks that files -should get written to backing store (rather than swap space), but ramfs hasn't -got any backing store. Because of this, only root (or a trusted user) should -be allowed write access to a ramfs mount. - -A ramfs derivative called tmpfs was created to add size limits, and the ability -to write the data to swap space. Normal users can be allowed write access to -tmpfs mounts. See Documentation/filesystems/tmpfs.txt for more information. - -What is rootfs? ---------------- - -Rootfs is a special instance of ramfs (or tmpfs, if that's enabled), which is -always present in 2.6 systems. You can't unmount rootfs for approximately the -same reason you can't kill the init process; rather than having special code -to check for and handle an empty list, it's smaller and simpler for the kernel -to just make sure certain lists can't become empty. - -Most systems just mount another filesystem over rootfs and ignore it. The -amount of space an empty instance of ramfs takes up is tiny. - -If CONFIG_TMPFS is enabled, rootfs will use tmpfs instead of ramfs by -default. To force ramfs, add "rootfstype=ramfs" to the kernel command -line. - -What is initramfs? ------------------- - -All 2.6 Linux kernels contain a gzipped "cpio" format archive, which is -extracted into rootfs when the kernel boots up. After extracting, the kernel -checks to see if rootfs contains a file "init", and if so it executes it as PID -1. If found, this init process is responsible for bringing the system the -rest of the way up, including locating and mounting the real root device (if -any). If rootfs does not contain an init program after the embedded cpio -archive is extracted into it, the kernel will fall through to the older code -to locate and mount a root partition, then exec some variant of /sbin/init -out of that. - -All this differs from the old initrd in several ways: - - - The old initrd was always a separate file, while the initramfs archive is - linked into the linux kernel image. (The directory linux-*/usr is devoted - to generating this archive during the build.) - - - The old initrd file was a gzipped filesystem image (in some file format, - such as ext2, that needed a driver built into the kernel), while the new - initramfs archive is a gzipped cpio archive (like tar only simpler, - see cpio(1) and Documentation/driver-api/early-userspace/buffer-format.rst). The - kernel's cpio extraction code is not only extremely small, it's also - __init text and data that can be discarded during the boot process. - - - The program run by the old initrd (which was called /initrd, not /init) did - some setup and then returned to the kernel, while the init program from - initramfs is not expected to return to the kernel. (If /init needs to hand - off control it can overmount / with a new root device and exec another init - program. See the switch_root utility, below.) - - - When switching another root device, initrd would pivot_root and then - umount the ramdisk. But initramfs is rootfs: you can neither pivot_root - rootfs, nor unmount it. Instead delete everything out of rootfs to - free up the space (find -xdev / -exec rm '{}' ';'), overmount rootfs - with the new root (cd /newmount; mount --move . /; chroot .), attach - stdin/stdout/stderr to the new /dev/console, and exec the new init. - - Since this is a remarkably persnickety process (and involves deleting - commands before you can run them), the klibc package introduced a helper - program (utils/run_init.c) to do all this for you. Most other packages - (such as busybox) have named this command "switch_root". - -Populating initramfs: ---------------------- - -The 2.6 kernel build process always creates a gzipped cpio format initramfs -archive and links it into the resulting kernel binary. By default, this -archive is empty (consuming 134 bytes on x86). - -The config option CONFIG_INITRAMFS_SOURCE (in General Setup in menuconfig, -and living in usr/Kconfig) can be used to specify a source for the -initramfs archive, which will automatically be incorporated into the -resulting binary. This option can point to an existing gzipped cpio -archive, a directory containing files to be archived, or a text file -specification such as the following example: - - dir /dev 755 0 0 - nod /dev/console 644 0 0 c 5 1 - nod /dev/loop0 644 0 0 b 7 0 - dir /bin 755 1000 1000 - slink /bin/sh busybox 777 0 0 - file /bin/busybox initramfs/busybox 755 0 0 - dir /proc 755 0 0 - dir /sys 755 0 0 - dir /mnt 755 0 0 - file /init initramfs/init.sh 755 0 0 - -Run "usr/gen_init_cpio" (after the kernel build) to get a usage message -documenting the above file format. - -One advantage of the configuration file is that root access is not required to -set permissions or create device nodes in the new archive. (Note that those -two example "file" entries expect to find files named "init.sh" and "busybox" in -a directory called "initramfs", under the linux-2.6.* directory. See -Documentation/driver-api/early-userspace/early_userspace_support.rst for more details.) - -The kernel does not depend on external cpio tools. If you specify a -directory instead of a configuration file, the kernel's build infrastructure -creates a configuration file from that directory (usr/Makefile calls -usr/gen_initramfs_list.sh), and proceeds to package up that directory -using the config file (by feeding it to usr/gen_init_cpio, which is created -from usr/gen_init_cpio.c). The kernel's build-time cpio creation code is -entirely self-contained, and the kernel's boot-time extractor is also -(obviously) self-contained. - -The one thing you might need external cpio utilities installed for is creating -or extracting your own preprepared cpio files to feed to the kernel build -(instead of a config file or directory). - -The following command line can extract a cpio image (either by the above script -or by the kernel build) back into its component files: - - cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames - -The following shell script can create a prebuilt cpio archive you can -use in place of the above config file: - - #!/bin/sh - - # Copyright 2006 Rob Landley and TimeSys Corporation. - # Licensed under GPL version 2 - - if [ $# -ne 2 ] - then - echo "usage: mkinitramfs directory imagename.cpio.gz" - exit 1 - fi - - if [ -d "$1" ] - then - echo "creating $2 from $1" - (cd "$1"; find . | cpio -o -H newc | gzip) > "$2" - else - echo "First argument must be a directory" - exit 1 - fi - -Note: The cpio man page contains some bad advice that will break your initramfs -archive if you follow it. It says "A typical way to generate the list -of filenames is with the find command; you should give find the -depth option -to minimize problems with permissions on directories that are unwritable or not -searchable." Don't do this when creating initramfs.cpio.gz images, it won't -work. The Linux kernel cpio extractor won't create files in a directory that -doesn't exist, so the directory entries must go before the files that go in -those directories. The above script gets them in the right order. - -External initramfs images: --------------------------- - -If the kernel has initrd support enabled, an external cpio.gz archive can also -be passed into a 2.6 kernel in place of an initrd. In this case, the kernel -will autodetect the type (initramfs, not initrd) and extract the external cpio -archive into rootfs before trying to run /init. - -This has the memory efficiency advantages of initramfs (no ramdisk block -device) but the separate packaging of initrd (which is nice if you have -non-GPL code you'd like to run from initramfs, without conflating it with -the GPL licensed Linux kernel binary). - -It can also be used to supplement the kernel's built-in initramfs image. The -files in the external archive will overwrite any conflicting files in -the built-in initramfs archive. Some distributors also prefer to customize -a single kernel image with task-specific initramfs images, without recompiling. - -Contents of initramfs: ----------------------- - -An initramfs archive is a complete self-contained root filesystem for Linux. -If you don't already understand what shared libraries, devices, and paths -you need to get a minimal root filesystem up and running, here are some -references: -http://www.tldp.org/HOWTO/Bootdisk-HOWTO/ -http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html -http://www.linuxfromscratch.org/lfs/view/stable/ - -The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is -designed to be a tiny C library to statically link early userspace -code against, along with some related utilities. It is BSD licensed. - -I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net) -myself. These are LGPL and GPL, respectively. (A self-contained initramfs -package is planned for the busybox 1.3 release.) - -In theory you could use glibc, but that's not well suited for small embedded -uses like this. (A "hello world" program statically linked against glibc is -over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do -name lookups, even when otherwise statically linked.) - -A good first step is to get initramfs to run a statically linked "hello world" -program as init, and test it under an emulator like qemu (www.qemu.org) or -User Mode Linux, like so: - - cat > hello.c << EOF - #include - #include - - int main(int argc, char *argv[]) - { - printf("Hello world!\n"); - sleep(999999999); - } - EOF - gcc -static hello.c -o init - echo init | cpio -o -H newc | gzip > test.cpio.gz - # Testing external initramfs using the initrd loading mechanism. - qemu -kernel /boot/vmlinuz -initrd test.cpio.gz /dev/zero - -When debugging a normal root filesystem, it's nice to be able to boot with -"init=/bin/sh". The initramfs equivalent is "rdinit=/bin/sh", and it's -just as useful. - -Why cpio rather than tar? -------------------------- - -This decision was made back in December, 2001. The discussion started here: - - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1538.html - -And spawned a second thread (specifically on tar vs cpio), starting here: - - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1587.html - -The quick and dirty summary version (which is no substitute for reading -the above threads) is: - -1) cpio is a standard. It's decades old (from the AT&T days), and already - widely used on Linux (inside RPM, Red Hat's device driver disks). Here's - a Linux Journal article about it from 1996: - - http://www.linuxjournal.com/article/1213 - - It's not as popular as tar because the traditional cpio command line tools - require _truly_hideous_ command line arguments. But that says nothing - either way about the archive format, and there are alternative tools, - such as: - - http://freecode.com/projects/afio - -2) The cpio archive format chosen by the kernel is simpler and cleaner (and - thus easier to create and parse) than any of the (literally dozens of) - various tar archive formats. The complete initramfs archive format is - explained in buffer-format.txt, created in usr/gen_init_cpio.c, and - extracted in init/initramfs.c. All three together come to less than 26k - total of human-readable text. - -3) The GNU project standardizing on tar is approximately as relevant as - Windows standardizing on zip. Linux is not part of either, and is free - to make its own technical decisions. - -4) Since this is a kernel internal format, it could easily have been - something brand new. The kernel provides its own tools to create and - extract this format anyway. Using an existing standard was preferable, - but not essential. - -5) Al Viro made the decision (quote: "tar is ugly as hell and not going to be - supported on the kernel side"): - - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1540.html - - explained his reasoning: - - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html - http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html - - and, most importantly, designed and implemented the initramfs code. - -Future directions: ------------------- - -Today (2.6.16), initramfs is always compiled in, but not always used. The -kernel falls back to legacy boot code that is reached only if initramfs does -not contain an /init program. The fallback is legacy code, there to ensure a -smooth transition and allowing early boot functionality to gradually move to -"early userspace" (I.E. initramfs). - -The move to early userspace is necessary because finding and mounting the real -root device is complex. Root partitions can span multiple devices (raid or -separate journal). They can be out on the network (requiring dhcp, setting a -specific MAC address, logging into a server, etc). They can live on removable -media, with dynamically allocated major/minor numbers and persistent naming -issues requiring a full udev implementation to sort out. They can be -compressed, encrypted, copy-on-write, loopback mounted, strangely partitioned, -and so on. - -This kind of complexity (which inevitably includes policy) is rightly handled -in userspace. Both klibc and busybox/uClibc are working on simple initramfs -packages to drop into a kernel build. - -The klibc package has now been accepted into Andrew Morton's 2.6.17-mm tree. -The kernel's current early boot code (partition detection, etc) will probably -be migrated into a default initramfs, automatically created and used by the -kernel build. -- cgit v1.2.3 From 56e6d5c0eb7b862b4c984107e665821722413008 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:21 +0100 Subject: docs: filesystems: convert relay.txt to ReST - Add a SPDX header; - Adjust document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Use notes markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/f48bb0fdf64d197f28c6f469adb61a7a091adb75.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/relay.rst | 501 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/relay.txt | 494 ----------------------------------- 3 files changed, 502 insertions(+), 494 deletions(-) create mode 100644 Documentation/filesystems/relay.rst delete mode 100644 Documentation/filesystems/relay.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index b8689d082911..0aade8146d4d 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -84,5 +84,6 @@ Documentation for filesystem implementations. proc qnx6 ramfs-rootfs-initramfs + relay virtiofs vfat diff --git a/Documentation/filesystems/relay.rst b/Documentation/filesystems/relay.rst new file mode 100644 index 000000000000..04ad083cfe62 --- /dev/null +++ b/Documentation/filesystems/relay.rst @@ -0,0 +1,501 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +relay interface (formerly relayfs) +================================== + +The relay interface provides a means for kernel applications to +efficiently log and transfer large quantities of data from the kernel +to userspace via user-defined 'relay channels'. + +A 'relay channel' is a kernel->user data relay mechanism implemented +as a set of per-cpu kernel buffers ('channel buffers'), each +represented as a regular file ('relay file') in user space. Kernel +clients write into the channel buffers using efficient write +functions; these automatically log into the current cpu's channel +buffer. User space applications mmap() or read() from the relay files +and retrieve the data as it becomes available. The relay files +themselves are files created in a host filesystem, e.g. debugfs, and +are associated with the channel buffers using the API described below. + +The format of the data logged into the channel buffers is completely +up to the kernel client; the relay interface does however provide +hooks which allow kernel clients to impose some structure on the +buffer data. The relay interface doesn't implement any form of data +filtering - this also is left to the kernel client. The purpose is to +keep things as simple as possible. + +This document provides an overview of the relay interface API. The +details of the function parameters are documented along with the +functions in the relay interface code - please see that for details. + +Semantics +========= + +Each relay channel has one buffer per CPU, each buffer has one or more +sub-buffers. Messages are written to the first sub-buffer until it is +too full to contain a new message, in which case it is written to +the next (if available). Messages are never split across sub-buffers. +At this point, userspace can be notified so it empties the first +sub-buffer, while the kernel continues writing to the next. + +When notified that a sub-buffer is full, the kernel knows how many +bytes of it are padding i.e. unused space occurring because a complete +message couldn't fit into a sub-buffer. Userspace can use this +knowledge to copy only valid data. + +After copying it, userspace can notify the kernel that a sub-buffer +has been consumed. + +A relay channel can operate in a mode where it will overwrite data not +yet collected by userspace, and not wait for it to be consumed. + +The relay channel itself does not provide for communication of such +data between userspace and kernel, allowing the kernel side to remain +simple and not impose a single interface on userspace. It does +provide a set of examples and a separate helper though, described +below. + +The read() interface both removes padding and internally consumes the +read sub-buffers; thus in cases where read(2) is being used to drain +the channel buffers, special-purpose communication between kernel and +user isn't necessary for basic operation. + +One of the major goals of the relay interface is to provide a low +overhead mechanism for conveying kernel data to userspace. While the +read() interface is easy to use, it's not as efficient as the mmap() +approach; the example code attempts to make the tradeoff between the +two approaches as small as possible. + +klog and relay-apps example code +================================ + +The relay interface itself is ready to use, but to make things easier, +a couple simple utility functions and a set of examples are provided. + +The relay-apps example tarball, available on the relay sourceforge +site, contains a set of self-contained examples, each consisting of a +pair of .c files containing boilerplate code for each of the user and +kernel sides of a relay application. When combined these two sets of +boilerplate code provide glue to easily stream data to disk, without +having to bother with mundane housekeeping chores. + +The 'klog debugging functions' patch (klog.patch in the relay-apps +tarball) provides a couple of high-level logging functions to the +kernel which allow writing formatted text or raw data to a channel, +regardless of whether a channel to write into exists or not, or even +whether the relay interface is compiled into the kernel or not. These +functions allow you to put unconditional 'trace' statements anywhere +in the kernel or kernel modules; only when there is a 'klog handler' +registered will data actually be logged (see the klog and kleak +examples for details). + +It is of course possible to use the relay interface from scratch, +i.e. without using any of the relay-apps example code or klog, but +you'll have to implement communication between userspace and kernel, +allowing both to convey the state of buffers (full, empty, amount of +padding). The read() interface both removes padding and internally +consumes the read sub-buffers; thus in cases where read(2) is being +used to drain the channel buffers, special-purpose communication +between kernel and user isn't necessary for basic operation. Things +such as buffer-full conditions would still need to be communicated via +some channel though. + +klog and the relay-apps examples can be found in the relay-apps +tarball on http://relayfs.sourceforge.net + +The relay interface user space API +================================== + +The relay interface implements basic file operations for user space +access to relay channel buffer data. Here are the file operations +that are available and some comments regarding their behavior: + +=========== ============================================================ +open() enables user to open an _existing_ channel buffer. + +mmap() results in channel buffer being mapped into the caller's + memory space. Note that you can't do a partial mmap - you + must map the entire file, which is NRBUF * SUBBUFSIZE. + +read() read the contents of a channel buffer. The bytes read are + 'consumed' by the reader, i.e. they won't be available + again to subsequent reads. If the channel is being used + in no-overwrite mode (the default), it can be read at any + time even if there's an active kernel writer. If the + channel is being used in overwrite mode and there are + active channel writers, results may be unpredictable - + users should make sure that all logging to the channel has + ended before using read() with overwrite mode. Sub-buffer + padding is automatically removed and will not be seen by + the reader. + +sendfile() transfer data from a channel buffer to an output file + descriptor. Sub-buffer padding is automatically removed + and will not be seen by the reader. + +poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are + notified when sub-buffer boundaries are crossed. + +close() decrements the channel buffer's refcount. When the refcount + reaches 0, i.e. when no process or kernel client has the + buffer open, the channel buffer is freed. +=========== ============================================================ + +In order for a user application to make use of relay files, the +host filesystem must be mounted. For example:: + + mount -t debugfs debugfs /sys/kernel/debug + +.. Note:: + + the host filesystem doesn't need to be mounted for kernel + clients to create or use channels - it only needs to be + mounted when user space applications need access to the buffer + data. + + +The relay interface kernel API +============================== + +Here's a summary of the API the relay interface provides to in-kernel clients: + +TBD(curr. line MT:/API/) + channel management functions:: + + relay_open(base_filename, parent, subbuf_size, n_subbufs, + callbacks, private_data) + relay_close(chan) + relay_flush(chan) + relay_reset(chan) + + channel management typically called on instigation of userspace:: + + relay_subbufs_consumed(chan, cpu, subbufs_consumed) + + write functions:: + + relay_write(chan, data, length) + __relay_write(chan, data, length) + relay_reserve(chan, length) + + callbacks:: + + subbuf_start(buf, subbuf, prev_subbuf, prev_padding) + buf_mapped(buf, filp) + buf_unmapped(buf, filp) + create_buf_file(filename, parent, mode, buf, is_global) + remove_buf_file(dentry) + + helper functions:: + + relay_buf_full(buf) + subbuf_start_reserve(buf, length) + + +Creating a channel +------------------ + +relay_open() is used to create a channel, along with its per-cpu +channel buffers. Each channel buffer will have an associated file +created for it in the host filesystem, which can be and mmapped or +read from in user space. The files are named basename0...basenameN-1 +where N is the number of online cpus, and by default will be created +in the root of the filesystem (if the parent param is NULL). If you +want a directory structure to contain your relay files, you should +create it using the host filesystem's directory creation function, +e.g. debugfs_create_dir(), and pass the parent directory to +relay_open(). Users are responsible for cleaning up any directory +structure they create, when the channel is closed - again the host +filesystem's directory removal functions should be used for that, +e.g. debugfs_remove(). + +In order for a channel to be created and the host filesystem's files +associated with its channel buffers, the user must provide definitions +for two callback functions, create_buf_file() and remove_buf_file(). +create_buf_file() is called once for each per-cpu buffer from +relay_open() and allows the user to create the file which will be used +to represent the corresponding channel buffer. The callback should +return the dentry of the file created to represent the channel buffer. +remove_buf_file() must also be defined; it's responsible for deleting +the file(s) created in create_buf_file() and is called during +relay_close(). + +Here are some typical definitions for these callbacks, in this case +using debugfs:: + + /* + * create_buf_file() callback. Creates relay file in debugfs. + */ + static struct dentry *create_buf_file_handler(const char *filename, + struct dentry *parent, + umode_t mode, + struct rchan_buf *buf, + int *is_global) + { + return debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); + } + + /* + * remove_buf_file() callback. Removes relay file from debugfs. + */ + static int remove_buf_file_handler(struct dentry *dentry) + { + debugfs_remove(dentry); + + return 0; + } + + /* + * relay interface callbacks + */ + static struct rchan_callbacks relay_callbacks = + { + .create_buf_file = create_buf_file_handler, + .remove_buf_file = remove_buf_file_handler, + }; + +And an example relay_open() invocation using them:: + + chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks, NULL); + +If the create_buf_file() callback fails, or isn't defined, channel +creation and thus relay_open() will fail. + +The total size of each per-cpu buffer is calculated by multiplying the +number of sub-buffers by the sub-buffer size passed into relay_open(). +The idea behind sub-buffers is that they're basically an extension of +double-buffering to N buffers, and they also allow applications to +easily implement random-access-on-buffer-boundary schemes, which can +be important for some high-volume applications. The number and size +of sub-buffers is completely dependent on the application and even for +the same application, different conditions will warrant different +values for these parameters at different times. Typically, the right +values to use are best decided after some experimentation; in general, +though, it's safe to assume that having only 1 sub-buffer is a bad +idea - you're guaranteed to either overwrite data or lose events +depending on the channel mode being used. + +The create_buf_file() implementation can also be defined in such a way +as to allow the creation of a single 'global' buffer instead of the +default per-cpu set. This can be useful for applications interested +mainly in seeing the relative ordering of system-wide events without +the need to bother with saving explicit timestamps for the purpose of +merging/sorting per-cpu files in a postprocessing step. + +To have relay_open() create a global buffer, the create_buf_file() +implementation should set the value of the is_global outparam to a +non-zero value in addition to creating the file that will be used to +represent the single buffer. In the case of a global buffer, +create_buf_file() and remove_buf_file() will be called only once. The +normal channel-writing functions, e.g. relay_write(), can still be +used - writes from any cpu will transparently end up in the global +buffer - but since it is a global buffer, callers should make sure +they use the proper locking for such a buffer, either by wrapping +writes in a spinlock, or by copying a write function from relay.h and +creating a local version that internally does the proper locking. + +The private_data passed into relay_open() allows clients to associate +user-defined data with a channel, and is immediately available +(including in create_buf_file()) via chan->private_data or +buf->chan->private_data. + +Buffer-only channels +-------------------- + +These channels have no files associated and can be created with +relay_open(NULL, NULL, ...). Such channels are useful in scenarios such +as when doing early tracing in the kernel, before the VFS is up. In these +cases, one may open a buffer-only channel and then call +relay_late_setup_files() when the kernel is ready to handle files, +to expose the buffered data to the userspace. + +Channel 'modes' +--------------- + +relay channels can be used in either of two modes - 'overwrite' or +'no-overwrite'. The mode is entirely determined by the implementation +of the subbuf_start() callback, as described below. The default if no +subbuf_start() callback is defined is 'no-overwrite' mode. If the +default mode suits your needs, and you plan to use the read() +interface to retrieve channel data, you can ignore the details of this +section, as it pertains mainly to mmap() implementations. + +In 'overwrite' mode, also known as 'flight recorder' mode, writes +continuously cycle around the buffer and will never fail, but will +unconditionally overwrite old data regardless of whether it's actually +been consumed. In no-overwrite mode, writes will fail, i.e. data will +be lost, if the number of unconsumed sub-buffers equals the total +number of sub-buffers in the channel. It should be clear that if +there is no consumer or if the consumer can't consume sub-buffers fast +enough, data will be lost in either case; the only difference is +whether data is lost from the beginning or the end of a buffer. + +As explained above, a relay channel is made of up one or more +per-cpu channel buffers, each implemented as a circular buffer +subdivided into one or more sub-buffers. Messages are written into +the current sub-buffer of the channel's current per-cpu buffer via the +write functions described below. Whenever a message can't fit into +the current sub-buffer, because there's no room left for it, the +client is notified via the subbuf_start() callback that a switch to a +new sub-buffer is about to occur. The client uses this callback to 1) +initialize the next sub-buffer if appropriate 2) finalize the previous +sub-buffer if appropriate and 3) return a boolean value indicating +whether or not to actually move on to the next sub-buffer. + +To implement 'no-overwrite' mode, the userspace client would provide +an implementation of the subbuf_start() callback something like the +following:: + + static int subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + unsigned int prev_padding) + { + if (prev_subbuf) + *((unsigned *)prev_subbuf) = prev_padding; + + if (relay_buf_full(buf)) + return 0; + + subbuf_start_reserve(buf, sizeof(unsigned int)); + + return 1; + } + +If the current buffer is full, i.e. all sub-buffers remain unconsumed, +the callback returns 0 to indicate that the buffer switch should not +occur yet, i.e. until the consumer has had a chance to read the +current set of ready sub-buffers. For the relay_buf_full() function +to make sense, the consumer is responsible for notifying the relay +interface when sub-buffers have been consumed via +relay_subbufs_consumed(). Any subsequent attempts to write into the +buffer will again invoke the subbuf_start() callback with the same +parameters; only when the consumer has consumed one or more of the +ready sub-buffers will relay_buf_full() return 0, in which case the +buffer switch can continue. + +The implementation of the subbuf_start() callback for 'overwrite' mode +would be very similar:: + + static int subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding) + { + if (prev_subbuf) + *((unsigned *)prev_subbuf) = prev_padding; + + subbuf_start_reserve(buf, sizeof(unsigned int)); + + return 1; + } + +In this case, the relay_buf_full() check is meaningless and the +callback always returns 1, causing the buffer switch to occur +unconditionally. It's also meaningless for the client to use the +relay_subbufs_consumed() function in this mode, as it's never +consulted. + +The default subbuf_start() implementation, used if the client doesn't +define any callbacks, or doesn't define the subbuf_start() callback, +implements the simplest possible 'no-overwrite' mode, i.e. it does +nothing but return 0. + +Header information can be reserved at the beginning of each sub-buffer +by calling the subbuf_start_reserve() helper function from within the +subbuf_start() callback. This reserved area can be used to store +whatever information the client wants. In the example above, room is +reserved in each sub-buffer to store the padding count for that +sub-buffer. This is filled in for the previous sub-buffer in the +subbuf_start() implementation; the padding value for the previous +sub-buffer is passed into the subbuf_start() callback along with a +pointer to the previous sub-buffer, since the padding value isn't +known until a sub-buffer is filled. The subbuf_start() callback is +also called for the first sub-buffer when the channel is opened, to +give the client a chance to reserve space in it. In this case the +previous sub-buffer pointer passed into the callback will be NULL, so +the client should check the value of the prev_subbuf pointer before +writing into the previous sub-buffer. + +Writing to a channel +-------------------- + +Kernel clients write data into the current cpu's channel buffer using +relay_write() or __relay_write(). relay_write() is the main logging +function - it uses local_irqsave() to protect the buffer and should be +used if you might be logging from interrupt context. If you know +you'll never be logging from interrupt context, you can use +__relay_write(), which only disables preemption. These functions +don't return a value, so you can't determine whether or not they +failed - the assumption is that you wouldn't want to check a return +value in the fast logging path anyway, and that they'll always succeed +unless the buffer is full and no-overwrite mode is being used, in +which case you can detect a failed write in the subbuf_start() +callback by calling the relay_buf_full() helper function. + +relay_reserve() is used to reserve a slot in a channel buffer which +can be written to later. This would typically be used in applications +that need to write directly into a channel buffer without having to +stage data in a temporary buffer beforehand. Because the actual write +may not happen immediately after the slot is reserved, applications +using relay_reserve() can keep a count of the number of bytes actually +written, either in space reserved in the sub-buffers themselves or as +a separate array. See the 'reserve' example in the relay-apps tarball +at http://relayfs.sourceforge.net for an example of how this can be +done. Because the write is under control of the client and is +separated from the reserve, relay_reserve() doesn't protect the buffer +at all - it's up to the client to provide the appropriate +synchronization when using relay_reserve(). + +Closing a channel +----------------- + +The client calls relay_close() when it's finished using the channel. +The channel and its associated buffers are destroyed when there are no +longer any references to any of the channel buffers. relay_flush() +forces a sub-buffer switch on all the channel buffers, and can be used +to finalize and process the last sub-buffers before the channel is +closed. + +Misc +---- + +Some applications may want to keep a channel around and re-use it +rather than open and close a new channel for each use. relay_reset() +can be used for this purpose - it resets a channel to its initial +state without reallocating channel buffer memory or destroying +existing mappings. It should however only be called when it's safe to +do so, i.e. when the channel isn't currently being written to. + +Finally, there are a couple of utility callbacks that can be used for +different purposes. buf_mapped() is called whenever a channel buffer +is mmapped from user space and buf_unmapped() is called when it's +unmapped. The client can use this notification to trigger actions +within the kernel application, such as enabling/disabling logging to +the channel. + + +Resources +========= + +For news, example code, mailing list, etc. see the relay interface homepage: + + http://relayfs.sourceforge.net + + +Credits +======= + +The ideas and specs for the relay interface came about as a result of +discussions on tracing involving the following: + +Michel Dagenais +Richard Moore +Bob Wisniewski +Karim Yaghmour +Tom Zanussi + +Also thanks to Hubertus Franke for a lot of useful suggestions and bug +reports. diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt deleted file mode 100644 index cd709a94d054..000000000000 --- a/Documentation/filesystems/relay.txt +++ /dev/null @@ -1,494 +0,0 @@ -relay interface (formerly relayfs) -================================== - -The relay interface provides a means for kernel applications to -efficiently log and transfer large quantities of data from the kernel -to userspace via user-defined 'relay channels'. - -A 'relay channel' is a kernel->user data relay mechanism implemented -as a set of per-cpu kernel buffers ('channel buffers'), each -represented as a regular file ('relay file') in user space. Kernel -clients write into the channel buffers using efficient write -functions; these automatically log into the current cpu's channel -buffer. User space applications mmap() or read() from the relay files -and retrieve the data as it becomes available. The relay files -themselves are files created in a host filesystem, e.g. debugfs, and -are associated with the channel buffers using the API described below. - -The format of the data logged into the channel buffers is completely -up to the kernel client; the relay interface does however provide -hooks which allow kernel clients to impose some structure on the -buffer data. The relay interface doesn't implement any form of data -filtering - this also is left to the kernel client. The purpose is to -keep things as simple as possible. - -This document provides an overview of the relay interface API. The -details of the function parameters are documented along with the -functions in the relay interface code - please see that for details. - -Semantics -========= - -Each relay channel has one buffer per CPU, each buffer has one or more -sub-buffers. Messages are written to the first sub-buffer until it is -too full to contain a new message, in which case it is written to -the next (if available). Messages are never split across sub-buffers. -At this point, userspace can be notified so it empties the first -sub-buffer, while the kernel continues writing to the next. - -When notified that a sub-buffer is full, the kernel knows how many -bytes of it are padding i.e. unused space occurring because a complete -message couldn't fit into a sub-buffer. Userspace can use this -knowledge to copy only valid data. - -After copying it, userspace can notify the kernel that a sub-buffer -has been consumed. - -A relay channel can operate in a mode where it will overwrite data not -yet collected by userspace, and not wait for it to be consumed. - -The relay channel itself does not provide for communication of such -data between userspace and kernel, allowing the kernel side to remain -simple and not impose a single interface on userspace. It does -provide a set of examples and a separate helper though, described -below. - -The read() interface both removes padding and internally consumes the -read sub-buffers; thus in cases where read(2) is being used to drain -the channel buffers, special-purpose communication between kernel and -user isn't necessary for basic operation. - -One of the major goals of the relay interface is to provide a low -overhead mechanism for conveying kernel data to userspace. While the -read() interface is easy to use, it's not as efficient as the mmap() -approach; the example code attempts to make the tradeoff between the -two approaches as small as possible. - -klog and relay-apps example code -================================ - -The relay interface itself is ready to use, but to make things easier, -a couple simple utility functions and a set of examples are provided. - -The relay-apps example tarball, available on the relay sourceforge -site, contains a set of self-contained examples, each consisting of a -pair of .c files containing boilerplate code for each of the user and -kernel sides of a relay application. When combined these two sets of -boilerplate code provide glue to easily stream data to disk, without -having to bother with mundane housekeeping chores. - -The 'klog debugging functions' patch (klog.patch in the relay-apps -tarball) provides a couple of high-level logging functions to the -kernel which allow writing formatted text or raw data to a channel, -regardless of whether a channel to write into exists or not, or even -whether the relay interface is compiled into the kernel or not. These -functions allow you to put unconditional 'trace' statements anywhere -in the kernel or kernel modules; only when there is a 'klog handler' -registered will data actually be logged (see the klog and kleak -examples for details). - -It is of course possible to use the relay interface from scratch, -i.e. without using any of the relay-apps example code or klog, but -you'll have to implement communication between userspace and kernel, -allowing both to convey the state of buffers (full, empty, amount of -padding). The read() interface both removes padding and internally -consumes the read sub-buffers; thus in cases where read(2) is being -used to drain the channel buffers, special-purpose communication -between kernel and user isn't necessary for basic operation. Things -such as buffer-full conditions would still need to be communicated via -some channel though. - -klog and the relay-apps examples can be found in the relay-apps -tarball on http://relayfs.sourceforge.net - -The relay interface user space API -================================== - -The relay interface implements basic file operations for user space -access to relay channel buffer data. Here are the file operations -that are available and some comments regarding their behavior: - -open() enables user to open an _existing_ channel buffer. - -mmap() results in channel buffer being mapped into the caller's - memory space. Note that you can't do a partial mmap - you - must map the entire file, which is NRBUF * SUBBUFSIZE. - -read() read the contents of a channel buffer. The bytes read are - 'consumed' by the reader, i.e. they won't be available - again to subsequent reads. If the channel is being used - in no-overwrite mode (the default), it can be read at any - time even if there's an active kernel writer. If the - channel is being used in overwrite mode and there are - active channel writers, results may be unpredictable - - users should make sure that all logging to the channel has - ended before using read() with overwrite mode. Sub-buffer - padding is automatically removed and will not be seen by - the reader. - -sendfile() transfer data from a channel buffer to an output file - descriptor. Sub-buffer padding is automatically removed - and will not be seen by the reader. - -poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are - notified when sub-buffer boundaries are crossed. - -close() decrements the channel buffer's refcount. When the refcount - reaches 0, i.e. when no process or kernel client has the - buffer open, the channel buffer is freed. - -In order for a user application to make use of relay files, the -host filesystem must be mounted. For example, - - mount -t debugfs debugfs /sys/kernel/debug - -NOTE: the host filesystem doesn't need to be mounted for kernel - clients to create or use channels - it only needs to be - mounted when user space applications need access to the buffer - data. - - -The relay interface kernel API -============================== - -Here's a summary of the API the relay interface provides to in-kernel clients: - -TBD(curr. line MT:/API/) - channel management functions: - - relay_open(base_filename, parent, subbuf_size, n_subbufs, - callbacks, private_data) - relay_close(chan) - relay_flush(chan) - relay_reset(chan) - - channel management typically called on instigation of userspace: - - relay_subbufs_consumed(chan, cpu, subbufs_consumed) - - write functions: - - relay_write(chan, data, length) - __relay_write(chan, data, length) - relay_reserve(chan, length) - - callbacks: - - subbuf_start(buf, subbuf, prev_subbuf, prev_padding) - buf_mapped(buf, filp) - buf_unmapped(buf, filp) - create_buf_file(filename, parent, mode, buf, is_global) - remove_buf_file(dentry) - - helper functions: - - relay_buf_full(buf) - subbuf_start_reserve(buf, length) - - -Creating a channel ------------------- - -relay_open() is used to create a channel, along with its per-cpu -channel buffers. Each channel buffer will have an associated file -created for it in the host filesystem, which can be and mmapped or -read from in user space. The files are named basename0...basenameN-1 -where N is the number of online cpus, and by default will be created -in the root of the filesystem (if the parent param is NULL). If you -want a directory structure to contain your relay files, you should -create it using the host filesystem's directory creation function, -e.g. debugfs_create_dir(), and pass the parent directory to -relay_open(). Users are responsible for cleaning up any directory -structure they create, when the channel is closed - again the host -filesystem's directory removal functions should be used for that, -e.g. debugfs_remove(). - -In order for a channel to be created and the host filesystem's files -associated with its channel buffers, the user must provide definitions -for two callback functions, create_buf_file() and remove_buf_file(). -create_buf_file() is called once for each per-cpu buffer from -relay_open() and allows the user to create the file which will be used -to represent the corresponding channel buffer. The callback should -return the dentry of the file created to represent the channel buffer. -remove_buf_file() must also be defined; it's responsible for deleting -the file(s) created in create_buf_file() and is called during -relay_close(). - -Here are some typical definitions for these callbacks, in this case -using debugfs: - -/* - * create_buf_file() callback. Creates relay file in debugfs. - */ -static struct dentry *create_buf_file_handler(const char *filename, - struct dentry *parent, - umode_t mode, - struct rchan_buf *buf, - int *is_global) -{ - return debugfs_create_file(filename, mode, parent, buf, - &relay_file_operations); -} - -/* - * remove_buf_file() callback. Removes relay file from debugfs. - */ -static int remove_buf_file_handler(struct dentry *dentry) -{ - debugfs_remove(dentry); - - return 0; -} - -/* - * relay interface callbacks - */ -static struct rchan_callbacks relay_callbacks = -{ - .create_buf_file = create_buf_file_handler, - .remove_buf_file = remove_buf_file_handler, -}; - -And an example relay_open() invocation using them: - - chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks, NULL); - -If the create_buf_file() callback fails, or isn't defined, channel -creation and thus relay_open() will fail. - -The total size of each per-cpu buffer is calculated by multiplying the -number of sub-buffers by the sub-buffer size passed into relay_open(). -The idea behind sub-buffers is that they're basically an extension of -double-buffering to N buffers, and they also allow applications to -easily implement random-access-on-buffer-boundary schemes, which can -be important for some high-volume applications. The number and size -of sub-buffers is completely dependent on the application and even for -the same application, different conditions will warrant different -values for these parameters at different times. Typically, the right -values to use are best decided after some experimentation; in general, -though, it's safe to assume that having only 1 sub-buffer is a bad -idea - you're guaranteed to either overwrite data or lose events -depending on the channel mode being used. - -The create_buf_file() implementation can also be defined in such a way -as to allow the creation of a single 'global' buffer instead of the -default per-cpu set. This can be useful for applications interested -mainly in seeing the relative ordering of system-wide events without -the need to bother with saving explicit timestamps for the purpose of -merging/sorting per-cpu files in a postprocessing step. - -To have relay_open() create a global buffer, the create_buf_file() -implementation should set the value of the is_global outparam to a -non-zero value in addition to creating the file that will be used to -represent the single buffer. In the case of a global buffer, -create_buf_file() and remove_buf_file() will be called only once. The -normal channel-writing functions, e.g. relay_write(), can still be -used - writes from any cpu will transparently end up in the global -buffer - but since it is a global buffer, callers should make sure -they use the proper locking for such a buffer, either by wrapping -writes in a spinlock, or by copying a write function from relay.h and -creating a local version that internally does the proper locking. - -The private_data passed into relay_open() allows clients to associate -user-defined data with a channel, and is immediately available -(including in create_buf_file()) via chan->private_data or -buf->chan->private_data. - -Buffer-only channels --------------------- - -These channels have no files associated and can be created with -relay_open(NULL, NULL, ...). Such channels are useful in scenarios such -as when doing early tracing in the kernel, before the VFS is up. In these -cases, one may open a buffer-only channel and then call -relay_late_setup_files() when the kernel is ready to handle files, -to expose the buffered data to the userspace. - -Channel 'modes' ---------------- - -relay channels can be used in either of two modes - 'overwrite' or -'no-overwrite'. The mode is entirely determined by the implementation -of the subbuf_start() callback, as described below. The default if no -subbuf_start() callback is defined is 'no-overwrite' mode. If the -default mode suits your needs, and you plan to use the read() -interface to retrieve channel data, you can ignore the details of this -section, as it pertains mainly to mmap() implementations. - -In 'overwrite' mode, also known as 'flight recorder' mode, writes -continuously cycle around the buffer and will never fail, but will -unconditionally overwrite old data regardless of whether it's actually -been consumed. In no-overwrite mode, writes will fail, i.e. data will -be lost, if the number of unconsumed sub-buffers equals the total -number of sub-buffers in the channel. It should be clear that if -there is no consumer or if the consumer can't consume sub-buffers fast -enough, data will be lost in either case; the only difference is -whether data is lost from the beginning or the end of a buffer. - -As explained above, a relay channel is made of up one or more -per-cpu channel buffers, each implemented as a circular buffer -subdivided into one or more sub-buffers. Messages are written into -the current sub-buffer of the channel's current per-cpu buffer via the -write functions described below. Whenever a message can't fit into -the current sub-buffer, because there's no room left for it, the -client is notified via the subbuf_start() callback that a switch to a -new sub-buffer is about to occur. The client uses this callback to 1) -initialize the next sub-buffer if appropriate 2) finalize the previous -sub-buffer if appropriate and 3) return a boolean value indicating -whether or not to actually move on to the next sub-buffer. - -To implement 'no-overwrite' mode, the userspace client would provide -an implementation of the subbuf_start() callback something like the -following: - -static int subbuf_start(struct rchan_buf *buf, - void *subbuf, - void *prev_subbuf, - unsigned int prev_padding) -{ - if (prev_subbuf) - *((unsigned *)prev_subbuf) = prev_padding; - - if (relay_buf_full(buf)) - return 0; - - subbuf_start_reserve(buf, sizeof(unsigned int)); - - return 1; -} - -If the current buffer is full, i.e. all sub-buffers remain unconsumed, -the callback returns 0 to indicate that the buffer switch should not -occur yet, i.e. until the consumer has had a chance to read the -current set of ready sub-buffers. For the relay_buf_full() function -to make sense, the consumer is responsible for notifying the relay -interface when sub-buffers have been consumed via -relay_subbufs_consumed(). Any subsequent attempts to write into the -buffer will again invoke the subbuf_start() callback with the same -parameters; only when the consumer has consumed one or more of the -ready sub-buffers will relay_buf_full() return 0, in which case the -buffer switch can continue. - -The implementation of the subbuf_start() callback for 'overwrite' mode -would be very similar: - -static int subbuf_start(struct rchan_buf *buf, - void *subbuf, - void *prev_subbuf, - size_t prev_padding) -{ - if (prev_subbuf) - *((unsigned *)prev_subbuf) = prev_padding; - - subbuf_start_reserve(buf, sizeof(unsigned int)); - - return 1; -} - -In this case, the relay_buf_full() check is meaningless and the -callback always returns 1, causing the buffer switch to occur -unconditionally. It's also meaningless for the client to use the -relay_subbufs_consumed() function in this mode, as it's never -consulted. - -The default subbuf_start() implementation, used if the client doesn't -define any callbacks, or doesn't define the subbuf_start() callback, -implements the simplest possible 'no-overwrite' mode, i.e. it does -nothing but return 0. - -Header information can be reserved at the beginning of each sub-buffer -by calling the subbuf_start_reserve() helper function from within the -subbuf_start() callback. This reserved area can be used to store -whatever information the client wants. In the example above, room is -reserved in each sub-buffer to store the padding count for that -sub-buffer. This is filled in for the previous sub-buffer in the -subbuf_start() implementation; the padding value for the previous -sub-buffer is passed into the subbuf_start() callback along with a -pointer to the previous sub-buffer, since the padding value isn't -known until a sub-buffer is filled. The subbuf_start() callback is -also called for the first sub-buffer when the channel is opened, to -give the client a chance to reserve space in it. In this case the -previous sub-buffer pointer passed into the callback will be NULL, so -the client should check the value of the prev_subbuf pointer before -writing into the previous sub-buffer. - -Writing to a channel --------------------- - -Kernel clients write data into the current cpu's channel buffer using -relay_write() or __relay_write(). relay_write() is the main logging -function - it uses local_irqsave() to protect the buffer and should be -used if you might be logging from interrupt context. If you know -you'll never be logging from interrupt context, you can use -__relay_write(), which only disables preemption. These functions -don't return a value, so you can't determine whether or not they -failed - the assumption is that you wouldn't want to check a return -value in the fast logging path anyway, and that they'll always succeed -unless the buffer is full and no-overwrite mode is being used, in -which case you can detect a failed write in the subbuf_start() -callback by calling the relay_buf_full() helper function. - -relay_reserve() is used to reserve a slot in a channel buffer which -can be written to later. This would typically be used in applications -that need to write directly into a channel buffer without having to -stage data in a temporary buffer beforehand. Because the actual write -may not happen immediately after the slot is reserved, applications -using relay_reserve() can keep a count of the number of bytes actually -written, either in space reserved in the sub-buffers themselves or as -a separate array. See the 'reserve' example in the relay-apps tarball -at http://relayfs.sourceforge.net for an example of how this can be -done. Because the write is under control of the client and is -separated from the reserve, relay_reserve() doesn't protect the buffer -at all - it's up to the client to provide the appropriate -synchronization when using relay_reserve(). - -Closing a channel ------------------ - -The client calls relay_close() when it's finished using the channel. -The channel and its associated buffers are destroyed when there are no -longer any references to any of the channel buffers. relay_flush() -forces a sub-buffer switch on all the channel buffers, and can be used -to finalize and process the last sub-buffers before the channel is -closed. - -Misc ----- - -Some applications may want to keep a channel around and re-use it -rather than open and close a new channel for each use. relay_reset() -can be used for this purpose - it resets a channel to its initial -state without reallocating channel buffer memory or destroying -existing mappings. It should however only be called when it's safe to -do so, i.e. when the channel isn't currently being written to. - -Finally, there are a couple of utility callbacks that can be used for -different purposes. buf_mapped() is called whenever a channel buffer -is mmapped from user space and buf_unmapped() is called when it's -unmapped. The client can use this notification to trigger actions -within the kernel application, such as enabling/disabling logging to -the channel. - - -Resources -========= - -For news, example code, mailing list, etc. see the relay interface homepage: - - http://relayfs.sourceforge.net - - -Credits -======= - -The ideas and specs for the relay interface came about as a result of -discussions on tracing involving the following: - -Michel Dagenais -Richard Moore -Bob Wisniewski -Karim Yaghmour -Tom Zanussi - -Also thanks to Hubertus Franke for a lot of useful suggestions and bug -reports. -- cgit v1.2.3 From 6db0a480aa07ab65b6c7d34d095c714359af3e87 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:22 +0100 Subject: docs: filesystems: convert romfs.txt to ReST - Add a SPDX header; - Add a document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/d2cc83e7cd6de63c793ccd3f2588ea40f7f1e764.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/romfs.rst | 194 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/romfs.txt | 186 ---------------------------------- 3 files changed, 195 insertions(+), 186 deletions(-) create mode 100644 Documentation/filesystems/romfs.rst delete mode 100644 Documentation/filesystems/romfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 0aade8146d4d..3b26639517af 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -85,5 +85,6 @@ Documentation for filesystem implementations. qnx6 ramfs-rootfs-initramfs relay + romfs virtiofs vfat diff --git a/Documentation/filesystems/romfs.rst b/Documentation/filesystems/romfs.rst new file mode 100644 index 000000000000..465b11efa9be --- /dev/null +++ b/Documentation/filesystems/romfs.rst @@ -0,0 +1,194 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +ROMFS - ROM File System +======================= + +This is a quite dumb, read only filesystem, mainly for initial RAM +disks of installation disks. It has grown up by the need of having +modules linked at boot time. Using this filesystem, you get a very +similar feature, and even the possibility of a small kernel, with a +file system which doesn't take up useful memory from the router +functions in the basement of your office. + +For comparison, both the older minix and xiafs (the latter is now +defunct) filesystems, compiled as module need more than 20000 bytes, +while romfs is less than a page, about 4000 bytes (assuming i586 +code). Under the same conditions, the msdos filesystem would need +about 30K (and does not support device nodes or symlinks), while the +nfs module with nfsroot is about 57K. Furthermore, as a bit unfair +comparison, an actual rescue disk used up 3202 blocks with ext2, while +with romfs, it needed 3079 blocks. + +To create such a file system, you'll need a user program named +genromfs. It is available on http://romfs.sourceforge.net/ + +As the name suggests, romfs could be also used (space-efficiently) on +various read-only media, like (E)EPROM disks if someone will have the +motivation.. :) + +However, the main purpose of romfs is to have a very small kernel, +which has only this filesystem linked in, and then can load any module +later, with the current module utilities. It can also be used to run +some program to decide if you need SCSI devices, and even IDE or +floppy drives can be loaded later if you use the "initrd"--initial +RAM disk--feature of the kernel. This would not be really news +flash, but with romfs, you can even spare off your ext2 or minix or +maybe even affs filesystem until you really know that you need it. + +For example, a distribution boot disk can contain only the cd disk +drivers (and possibly the SCSI drivers), and the ISO 9660 filesystem +module. The kernel can be small enough, since it doesn't have other +filesystems, like the quite large ext2fs module, which can then be +loaded off the CD at a later stage of the installation. Another use +would be for a recovery disk, when you are reinstalling a workstation +from the network, and you will have all the tools/modules available +from a nearby server, so you don't want to carry two disks for this +purpose, just because it won't fit into ext2. + +romfs operates on block devices as you can expect, and the underlying +structure is very simple. Every accessible structure begins on 16 +byte boundaries for fast access. The minimum space a file will take +is 32 bytes (this is an empty file, with a less than 16 character +name). The maximum overhead for any non-empty file is the header, and +the 16 byte padding for the name and the contents, also 16+14+15 = 45 +bytes. This is quite rare however, since most file names are longer +than 3 bytes, and shorter than 15 bytes. + +The layout of the filesystem is the following:: + + offset content + + +---+---+---+---+ + 0 | - | r | o | m | \ + +---+---+---+---+ The ASCII representation of those bytes + 4 | 1 | f | s | - | / (i.e. "-rom1fs-") + +---+---+---+---+ + 8 | full size | The number of accessible bytes in this fs. + +---+---+---+---+ + 12 | checksum | The checksum of the FIRST 512 BYTES. + +---+---+---+---+ + 16 | volume name | The zero terminated name of the volume, + : : padded to 16 byte boundary. + +---+---+---+---+ + xx | file | + : headers : + +Every multi byte value (32 bit words, I'll use the longwords term from +now on) must be in big endian order. + +The first eight bytes identify the filesystem, even for the casual +inspector. After that, in the 3rd longword, it contains the number of +bytes accessible from the start of this filesystem. The 4th longword +is the checksum of the first 512 bytes (or the number of bytes +accessible, whichever is smaller). The applied algorithm is the same +as in the AFFS filesystem, namely a simple sum of the longwords +(assuming bigendian quantities again). For details, please consult +the source. This algorithm was chosen because although it's not quite +reliable, it does not require any tables, and it is very simple. + +The following bytes are now part of the file system; each file header +must begin on a 16 byte boundary:: + + offset content + + +---+---+---+---+ + 0 | next filehdr|X| The offset of the next file header + +---+---+---+---+ (zero if no more files) + 4 | spec.info | Info for directories/hard links/devices + +---+---+---+---+ + 8 | size | The size of this file in bytes + +---+---+---+---+ + 12 | checksum | Covering the meta data, including the file + +---+---+---+---+ name, and padding + 16 | file name | The zero terminated name of the file, + : : padded to 16 byte boundary + +---+---+---+---+ + xx | file data | + : : + +Since the file headers begin always at a 16 byte boundary, the lowest +4 bits would be always zero in the next filehdr pointer. These four +bits are used for the mode information. Bits 0..2 specify the type of +the file; while bit 4 shows if the file is executable or not. The +permissions are assumed to be world readable, if this bit is not set, +and world executable if it is; except the character and block devices, +they are never accessible for other than owner. The owner of every +file is user and group 0, this should never be a problem for the +intended use. The mapping of the 8 possible values to file types is +the following: + +== =============== ============================================ + mapping spec.info means +== =============== ============================================ + 0 hard link link destination [file header] + 1 directory first file's header + 2 regular file unused, must be zero [MBZ] + 3 symbolic link unused, MBZ (file data is the link content) + 4 block device 16/16 bits major/minor number + 5 char device - " - + 6 socket unused, MBZ + 7 fifo unused, MBZ +== =============== ============================================ + +Note that hard links are specifically marked in this filesystem, but +they will behave as you can expect (i.e. share the inode number). +Note also that it is your responsibility to not create hard link +loops, and creating all the . and .. links for directories. This is +normally done correctly by the genromfs program. Please refrain from +using the executable bits for special purposes on the socket and fifo +special files, they may have other uses in the future. Additionally, +please remember that only regular files, and symlinks are supposed to +have a nonzero size field; they contain the number of bytes available +directly after the (padded) file name. + +Another thing to note is that romfs works on file headers and data +aligned to 16 byte boundaries, but most hardware devices and the block +device drivers are unable to cope with smaller than block-sized data. +To overcome this limitation, the whole size of the file system must be +padded to an 1024 byte boundary. + +If you have any problems or suggestions concerning this file system, +please contact me. However, think twice before wanting me to add +features and code, because the primary and most important advantage of +this file system is the small code. On the other hand, don't be +alarmed, I'm not getting that much romfs related mail. Now I can +understand why Avery wrote poems in the ARCnet docs to get some more +feedback. :) + +romfs has also a mailing list, and to date, it hasn't received any +traffic, so you are welcome to join it to discuss your ideas. :) + +It's run by ezmlm, so you can subscribe to it by sending a message +to romfs-subscribe@shadow.banki.hu, the content is irrelevant. + +Pending issues: + +- Permissions and owner information are pretty essential features of a + Un*x like system, but romfs does not provide the full possibilities. + I have never found this limiting, but others might. + +- The file system is read only, so it can be very small, but in case + one would want to write _anything_ to a file system, he still needs + a writable file system, thus negating the size advantages. Possible + solutions: implement write access as a compile-time option, or a new, + similarly small writable filesystem for RAM disks. + +- Since the files are only required to have alignment on a 16 byte + boundary, it is currently possibly suboptimal to read or execute files + from the filesystem. It might be resolved by reordering file data to + have most of it (i.e. except the start and the end) laying at "natural" + boundaries, thus it would be possible to directly map a big portion of + the file contents to the mm subsystem. + +- Compression might be an useful feature, but memory is quite a + limiting factor in my eyes. + +- Where it is used? + +- Does it work on other architectures than intel and motorola? + + +Have fun, + +Janos Farkas diff --git a/Documentation/filesystems/romfs.txt b/Documentation/filesystems/romfs.txt deleted file mode 100644 index e2b07cc9120a..000000000000 --- a/Documentation/filesystems/romfs.txt +++ /dev/null @@ -1,186 +0,0 @@ -ROMFS - ROM FILE SYSTEM - -This is a quite dumb, read only filesystem, mainly for initial RAM -disks of installation disks. It has grown up by the need of having -modules linked at boot time. Using this filesystem, you get a very -similar feature, and even the possibility of a small kernel, with a -file system which doesn't take up useful memory from the router -functions in the basement of your office. - -For comparison, both the older minix and xiafs (the latter is now -defunct) filesystems, compiled as module need more than 20000 bytes, -while romfs is less than a page, about 4000 bytes (assuming i586 -code). Under the same conditions, the msdos filesystem would need -about 30K (and does not support device nodes or symlinks), while the -nfs module with nfsroot is about 57K. Furthermore, as a bit unfair -comparison, an actual rescue disk used up 3202 blocks with ext2, while -with romfs, it needed 3079 blocks. - -To create such a file system, you'll need a user program named -genromfs. It is available on http://romfs.sourceforge.net/ - -As the name suggests, romfs could be also used (space-efficiently) on -various read-only media, like (E)EPROM disks if someone will have the -motivation.. :) - -However, the main purpose of romfs is to have a very small kernel, -which has only this filesystem linked in, and then can load any module -later, with the current module utilities. It can also be used to run -some program to decide if you need SCSI devices, and even IDE or -floppy drives can be loaded later if you use the "initrd"--initial -RAM disk--feature of the kernel. This would not be really news -flash, but with romfs, you can even spare off your ext2 or minix or -maybe even affs filesystem until you really know that you need it. - -For example, a distribution boot disk can contain only the cd disk -drivers (and possibly the SCSI drivers), and the ISO 9660 filesystem -module. The kernel can be small enough, since it doesn't have other -filesystems, like the quite large ext2fs module, which can then be -loaded off the CD at a later stage of the installation. Another use -would be for a recovery disk, when you are reinstalling a workstation -from the network, and you will have all the tools/modules available -from a nearby server, so you don't want to carry two disks for this -purpose, just because it won't fit into ext2. - -romfs operates on block devices as you can expect, and the underlying -structure is very simple. Every accessible structure begins on 16 -byte boundaries for fast access. The minimum space a file will take -is 32 bytes (this is an empty file, with a less than 16 character -name). The maximum overhead for any non-empty file is the header, and -the 16 byte padding for the name and the contents, also 16+14+15 = 45 -bytes. This is quite rare however, since most file names are longer -than 3 bytes, and shorter than 15 bytes. - -The layout of the filesystem is the following: - -offset content - - +---+---+---+---+ - 0 | - | r | o | m | \ - +---+---+---+---+ The ASCII representation of those bytes - 4 | 1 | f | s | - | / (i.e. "-rom1fs-") - +---+---+---+---+ - 8 | full size | The number of accessible bytes in this fs. - +---+---+---+---+ - 12 | checksum | The checksum of the FIRST 512 BYTES. - +---+---+---+---+ - 16 | volume name | The zero terminated name of the volume, - : : padded to 16 byte boundary. - +---+---+---+---+ - xx | file | - : headers : - -Every multi byte value (32 bit words, I'll use the longwords term from -now on) must be in big endian order. - -The first eight bytes identify the filesystem, even for the casual -inspector. After that, in the 3rd longword, it contains the number of -bytes accessible from the start of this filesystem. The 4th longword -is the checksum of the first 512 bytes (or the number of bytes -accessible, whichever is smaller). The applied algorithm is the same -as in the AFFS filesystem, namely a simple sum of the longwords -(assuming bigendian quantities again). For details, please consult -the source. This algorithm was chosen because although it's not quite -reliable, it does not require any tables, and it is very simple. - -The following bytes are now part of the file system; each file header -must begin on a 16 byte boundary. - -offset content - - +---+---+---+---+ - 0 | next filehdr|X| The offset of the next file header - +---+---+---+---+ (zero if no more files) - 4 | spec.info | Info for directories/hard links/devices - +---+---+---+---+ - 8 | size | The size of this file in bytes - +---+---+---+---+ - 12 | checksum | Covering the meta data, including the file - +---+---+---+---+ name, and padding - 16 | file name | The zero terminated name of the file, - : : padded to 16 byte boundary - +---+---+---+---+ - xx | file data | - : : - -Since the file headers begin always at a 16 byte boundary, the lowest -4 bits would be always zero in the next filehdr pointer. These four -bits are used for the mode information. Bits 0..2 specify the type of -the file; while bit 4 shows if the file is executable or not. The -permissions are assumed to be world readable, if this bit is not set, -and world executable if it is; except the character and block devices, -they are never accessible for other than owner. The owner of every -file is user and group 0, this should never be a problem for the -intended use. The mapping of the 8 possible values to file types is -the following: - - mapping spec.info means - 0 hard link link destination [file header] - 1 directory first file's header - 2 regular file unused, must be zero [MBZ] - 3 symbolic link unused, MBZ (file data is the link content) - 4 block device 16/16 bits major/minor number - 5 char device - " - - 6 socket unused, MBZ - 7 fifo unused, MBZ - -Note that hard links are specifically marked in this filesystem, but -they will behave as you can expect (i.e. share the inode number). -Note also that it is your responsibility to not create hard link -loops, and creating all the . and .. links for directories. This is -normally done correctly by the genromfs program. Please refrain from -using the executable bits for special purposes on the socket and fifo -special files, they may have other uses in the future. Additionally, -please remember that only regular files, and symlinks are supposed to -have a nonzero size field; they contain the number of bytes available -directly after the (padded) file name. - -Another thing to note is that romfs works on file headers and data -aligned to 16 byte boundaries, but most hardware devices and the block -device drivers are unable to cope with smaller than block-sized data. -To overcome this limitation, the whole size of the file system must be -padded to an 1024 byte boundary. - -If you have any problems or suggestions concerning this file system, -please contact me. However, think twice before wanting me to add -features and code, because the primary and most important advantage of -this file system is the small code. On the other hand, don't be -alarmed, I'm not getting that much romfs related mail. Now I can -understand why Avery wrote poems in the ARCnet docs to get some more -feedback. :) - -romfs has also a mailing list, and to date, it hasn't received any -traffic, so you are welcome to join it to discuss your ideas. :) - -It's run by ezmlm, so you can subscribe to it by sending a message -to romfs-subscribe@shadow.banki.hu, the content is irrelevant. - -Pending issues: - -- Permissions and owner information are pretty essential features of a -Un*x like system, but romfs does not provide the full possibilities. -I have never found this limiting, but others might. - -- The file system is read only, so it can be very small, but in case -one would want to write _anything_ to a file system, he still needs -a writable file system, thus negating the size advantages. Possible -solutions: implement write access as a compile-time option, or a new, -similarly small writable filesystem for RAM disks. - -- Since the files are only required to have alignment on a 16 byte -boundary, it is currently possibly suboptimal to read or execute files -from the filesystem. It might be resolved by reordering file data to -have most of it (i.e. except the start and the end) laying at "natural" -boundaries, thus it would be possible to directly map a big portion of -the file contents to the mm subsystem. - -- Compression might be an useful feature, but memory is quite a -limiting factor in my eyes. - -- Where it is used? - -- Does it work on other architectures than intel and motorola? - - -Have fun, -Janos Farkas -- cgit v1.2.3 From 31771f45c8e46d9356f1a58329f5cd40ab331e1a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:23 +0100 Subject: docs: filesystems: convert squashfs.txt to ReST - Add a SPDX header; - Adjust document and section titles; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/cec30862c7ee7de7f9cd903e35e6c8bf74cc928a.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/squashfs.rst | 265 +++++++++++++++++++++++++++++++++ Documentation/filesystems/squashfs.txt | 259 -------------------------------- 3 files changed, 266 insertions(+), 259 deletions(-) create mode 100644 Documentation/filesystems/squashfs.rst delete mode 100644 Documentation/filesystems/squashfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 3b26639517af..97a5f65ae509 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -86,5 +86,6 @@ Documentation for filesystem implementations. ramfs-rootfs-initramfs relay romfs + squashfs virtiofs vfat diff --git a/Documentation/filesystems/squashfs.rst b/Documentation/filesystems/squashfs.rst new file mode 100644 index 000000000000..df42106bae71 --- /dev/null +++ b/Documentation/filesystems/squashfs.rst @@ -0,0 +1,265 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +Squashfs 4.0 Filesystem +======================= + +Squashfs is a compressed read-only filesystem for Linux. + +It uses zlib, lz4, lzo, or xz compression to compress files, inodes and +directories. Inodes in the system are very small and all blocks are packed to +minimise data overhead. Block sizes greater than 4K are supported up to a +maximum of 1Mbytes (default block size 128K). + +Squashfs is intended for general read-only filesystem use, for archival +use (i.e. in cases where a .tar.gz file may be used), and in constrained +block device/memory systems (e.g. embedded systems) where low overhead is +needed. + +Mailing list: squashfs-devel@lists.sourceforge.net +Web site: www.squashfs.org + +1. Filesystem Features +---------------------- + +Squashfs filesystem features versus Cramfs: + +============================== ========= ========== + Squashfs Cramfs +============================== ========= ========== +Max filesystem size 2^64 256 MiB +Max file size ~ 2 TiB 16 MiB +Max files unlimited unlimited +Max directories unlimited unlimited +Max entries per directory unlimited unlimited +Max block size 1 MiB 4 KiB +Metadata compression yes no +Directory indexes yes no +Sparse file support yes no +Tail-end packing (fragments) yes no +Exportable (NFS etc.) yes no +Hard link support yes no +"." and ".." in readdir yes no +Real inode numbers yes no +32-bit uids/gids yes no +File creation time yes no +Xattr support yes no +ACL support no no +============================== ========= ========== + +Squashfs compresses data, inodes and directories. In addition, inode and +directory data are highly compacted, and packed on byte boundaries. Each +compressed inode is on average 8 bytes in length (the exact length varies on +file type, i.e. regular file, directory, symbolic link, and block/char device +inodes have different sizes). + +2. Using Squashfs +----------------- + +As squashfs is a read-only filesystem, the mksquashfs program must be used to +create populated squashfs filesystems. This and other squashfs utilities +can be obtained from http://www.squashfs.org. Usage instructions can be +obtained from this site also. + +The squashfs-tools development tree is now located on kernel.org + git://git.kernel.org/pub/scm/fs/squashfs/squashfs-tools.git + +3. Squashfs Filesystem Design +----------------------------- + +A squashfs filesystem consists of a maximum of nine parts, packed together on a +byte alignment:: + + --------------- + | superblock | + |---------------| + | compression | + | options | + |---------------| + | datablocks | + | & fragments | + |---------------| + | inode table | + |---------------| + | directory | + | table | + |---------------| + | fragment | + | table | + |---------------| + | export | + | table | + |---------------| + | uid/gid | + | lookup table | + |---------------| + | xattr | + | table | + --------------- + +Compressed data blocks are written to the filesystem as files are read from +the source directory, and checked for duplicates. Once all file data has been +written the completed inode, directory, fragment, export, uid/gid lookup and +xattr tables are written. + +3.1 Compression options +----------------------- + +Compressors can optionally support compression specific options (e.g. +dictionary size). If non-default compression options have been used, then +these are stored here. + +3.2 Inodes +---------- + +Metadata (inodes and directories) are compressed in 8Kbyte blocks. Each +compressed block is prefixed by a two byte length, the top bit is set if the +block is uncompressed. A block will be uncompressed if the -noI option is set, +or if the compressed block was larger than the uncompressed block. + +Inodes are packed into the metadata blocks, and are not aligned to block +boundaries, therefore inodes overlap compressed blocks. Inodes are identified +by a 48-bit number which encodes the location of the compressed metadata block +containing the inode, and the byte offset into that block where the inode is +placed (). + +To maximise compression there are different inodes for each file type +(regular file, directory, device, etc.), the inode contents and length +varying with the type. + +To further maximise compression, two types of regular file inode and +directory inode are defined: inodes optimised for frequently occurring +regular files and directories, and extended types where extra +information has to be stored. + +3.3 Directories +--------------- + +Like inodes, directories are packed into compressed metadata blocks, stored +in a directory table. Directories are accessed using the start address of +the metablock containing the directory and the offset into the +decompressed block (). + +Directories are organised in a slightly complex way, and are not simply +a list of file names. The organisation takes advantage of the +fact that (in most cases) the inodes of the files will be in the same +compressed metadata block, and therefore, can share the start block. +Directories are therefore organised in a two level list, a directory +header containing the shared start block value, and a sequence of directory +entries, each of which share the shared start block. A new directory header +is written once/if the inode start block changes. The directory +header/directory entry list is repeated as many times as necessary. + +Directories are sorted, and can contain a directory index to speed up +file lookup. Directory indexes store one entry per metablock, each entry +storing the index/filename mapping to the first directory header +in each metadata block. Directories are sorted in alphabetical order, +and at lookup the index is scanned linearly looking for the first filename +alphabetically larger than the filename being looked up. At this point the +location of the metadata block the filename is in has been found. +The general idea of the index is to ensure only one metadata block needs to be +decompressed to do a lookup irrespective of the length of the directory. +This scheme has the advantage that it doesn't require extra memory overhead +and doesn't require much extra storage on disk. + +3.4 File data +------------- + +Regular files consist of a sequence of contiguous compressed blocks, and/or a +compressed fragment block (tail-end packed block). The compressed size +of each datablock is stored in a block list contained within the +file inode. + +To speed up access to datablocks when reading 'large' files (256 Mbytes or +larger), the code implements an index cache that caches the mapping from +block index to datablock location on disk. + +The index cache allows Squashfs to handle large files (up to 1.75 TiB) while +retaining a simple and space-efficient block list on disk. The cache +is split into slots, caching up to eight 224 GiB files (128 KiB blocks). +Larger files use multiple slots, with 1.75 TiB files using all 8 slots. +The index cache is designed to be memory efficient, and by default uses +16 KiB. + +3.5 Fragment lookup table +------------------------- + +Regular files can contain a fragment index which is mapped to a fragment +location on disk and compressed size using a fragment lookup table. This +fragment lookup table is itself stored compressed into metadata blocks. +A second index table is used to locate these. This second index table for +speed of access (and because it is small) is read at mount time and cached +in memory. + +3.6 Uid/gid lookup table +------------------------ + +For space efficiency regular files store uid and gid indexes, which are +converted to 32-bit uids/gids using an id look up table. This table is +stored compressed into metadata blocks. A second index table is used to +locate these. This second index table for speed of access (and because it +is small) is read at mount time and cached in memory. + +3.7 Export table +---------------- + +To enable Squashfs filesystems to be exportable (via NFS etc.) filesystems +can optionally (disabled with the -no-exports Mksquashfs option) contain +an inode number to inode disk location lookup table. This is required to +enable Squashfs to map inode numbers passed in filehandles to the inode +location on disk, which is necessary when the export code reinstantiates +expired/flushed inodes. + +This table is stored compressed into metadata blocks. A second index table is +used to locate these. This second index table for speed of access (and because +it is small) is read at mount time and cached in memory. + +3.8 Xattr table +--------------- + +The xattr table contains extended attributes for each inode. The xattrs +for each inode are stored in a list, each list entry containing a type, +name and value field. The type field encodes the xattr prefix +("user.", "trusted." etc) and it also encodes how the name/value fields +should be interpreted. Currently the type indicates whether the value +is stored inline (in which case the value field contains the xattr value), +or if it is stored out of line (in which case the value field stores a +reference to where the actual value is stored). This allows large values +to be stored out of line improving scanning and lookup performance and it +also allows values to be de-duplicated, the value being stored once, and +all other occurrences holding an out of line reference to that value. + +The xattr lists are packed into compressed 8K metadata blocks. +To reduce overhead in inodes, rather than storing the on-disk +location of the xattr list inside each inode, a 32-bit xattr id +is stored. This xattr id is mapped into the location of the xattr +list using a second xattr id lookup table. + +4. TODOs and Outstanding Issues +------------------------------- + +4.1 TODO list +------------- + +Implement ACL support. + +4.2 Squashfs Internal Cache +--------------------------- + +Blocks in Squashfs are compressed. To avoid repeatedly decompressing +recently accessed data Squashfs uses two small metadata and fragment caches. + +The cache is not used for file datablocks, these are decompressed and cached in +the page-cache in the normal way. The cache is used to temporarily cache +fragment and metadata blocks which have been read as a result of a metadata +(i.e. inode or directory) or fragment access. Because metadata and fragments +are packed together into blocks (to gain greater compression) the read of a +particular piece of metadata or fragment will retrieve other metadata/fragments +which have been packed with it, these because of locality-of-reference may be +read in the near future. Temporarily caching them ensures they are available +for near future access without requiring an additional read and decompress. + +In the future this internal cache may be replaced with an implementation which +uses the kernel page cache. Because the page cache operates on page sized +units this may introduce additional complexity in terms of locking and +associated race conditions. diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt deleted file mode 100644 index e5274f84dc56..000000000000 --- a/Documentation/filesystems/squashfs.txt +++ /dev/null @@ -1,259 +0,0 @@ -SQUASHFS 4.0 FILESYSTEM -======================= - -Squashfs is a compressed read-only filesystem for Linux. -It uses zlib, lz4, lzo, or xz compression to compress files, inodes and -directories. Inodes in the system are very small and all blocks are packed to -minimise data overhead. Block sizes greater than 4K are supported up to a -maximum of 1Mbytes (default block size 128K). - -Squashfs is intended for general read-only filesystem use, for archival -use (i.e. in cases where a .tar.gz file may be used), and in constrained -block device/memory systems (e.g. embedded systems) where low overhead is -needed. - -Mailing list: squashfs-devel@lists.sourceforge.net -Web site: www.squashfs.org - -1. FILESYSTEM FEATURES ----------------------- - -Squashfs filesystem features versus Cramfs: - - Squashfs Cramfs - -Max filesystem size: 2^64 256 MiB -Max file size: ~ 2 TiB 16 MiB -Max files: unlimited unlimited -Max directories: unlimited unlimited -Max entries per directory: unlimited unlimited -Max block size: 1 MiB 4 KiB -Metadata compression: yes no -Directory indexes: yes no -Sparse file support: yes no -Tail-end packing (fragments): yes no -Exportable (NFS etc.): yes no -Hard link support: yes no -"." and ".." in readdir: yes no -Real inode numbers: yes no -32-bit uids/gids: yes no -File creation time: yes no -Xattr support: yes no -ACL support: no no - -Squashfs compresses data, inodes and directories. In addition, inode and -directory data are highly compacted, and packed on byte boundaries. Each -compressed inode is on average 8 bytes in length (the exact length varies on -file type, i.e. regular file, directory, symbolic link, and block/char device -inodes have different sizes). - -2. USING SQUASHFS ------------------ - -As squashfs is a read-only filesystem, the mksquashfs program must be used to -create populated squashfs filesystems. This and other squashfs utilities -can be obtained from http://www.squashfs.org. Usage instructions can be -obtained from this site also. - -The squashfs-tools development tree is now located on kernel.org - git://git.kernel.org/pub/scm/fs/squashfs/squashfs-tools.git - -3. SQUASHFS FILESYSTEM DESIGN ------------------------------ - -A squashfs filesystem consists of a maximum of nine parts, packed together on a -byte alignment: - - --------------- - | superblock | - |---------------| - | compression | - | options | - |---------------| - | datablocks | - | & fragments | - |---------------| - | inode table | - |---------------| - | directory | - | table | - |---------------| - | fragment | - | table | - |---------------| - | export | - | table | - |---------------| - | uid/gid | - | lookup table | - |---------------| - | xattr | - | table | - --------------- - -Compressed data blocks are written to the filesystem as files are read from -the source directory, and checked for duplicates. Once all file data has been -written the completed inode, directory, fragment, export, uid/gid lookup and -xattr tables are written. - -3.1 Compression options ------------------------ - -Compressors can optionally support compression specific options (e.g. -dictionary size). If non-default compression options have been used, then -these are stored here. - -3.2 Inodes ----------- - -Metadata (inodes and directories) are compressed in 8Kbyte blocks. Each -compressed block is prefixed by a two byte length, the top bit is set if the -block is uncompressed. A block will be uncompressed if the -noI option is set, -or if the compressed block was larger than the uncompressed block. - -Inodes are packed into the metadata blocks, and are not aligned to block -boundaries, therefore inodes overlap compressed blocks. Inodes are identified -by a 48-bit number which encodes the location of the compressed metadata block -containing the inode, and the byte offset into that block where the inode is -placed (). - -To maximise compression there are different inodes for each file type -(regular file, directory, device, etc.), the inode contents and length -varying with the type. - -To further maximise compression, two types of regular file inode and -directory inode are defined: inodes optimised for frequently occurring -regular files and directories, and extended types where extra -information has to be stored. - -3.3 Directories ---------------- - -Like inodes, directories are packed into compressed metadata blocks, stored -in a directory table. Directories are accessed using the start address of -the metablock containing the directory and the offset into the -decompressed block (). - -Directories are organised in a slightly complex way, and are not simply -a list of file names. The organisation takes advantage of the -fact that (in most cases) the inodes of the files will be in the same -compressed metadata block, and therefore, can share the start block. -Directories are therefore organised in a two level list, a directory -header containing the shared start block value, and a sequence of directory -entries, each of which share the shared start block. A new directory header -is written once/if the inode start block changes. The directory -header/directory entry list is repeated as many times as necessary. - -Directories are sorted, and can contain a directory index to speed up -file lookup. Directory indexes store one entry per metablock, each entry -storing the index/filename mapping to the first directory header -in each metadata block. Directories are sorted in alphabetical order, -and at lookup the index is scanned linearly looking for the first filename -alphabetically larger than the filename being looked up. At this point the -location of the metadata block the filename is in has been found. -The general idea of the index is to ensure only one metadata block needs to be -decompressed to do a lookup irrespective of the length of the directory. -This scheme has the advantage that it doesn't require extra memory overhead -and doesn't require much extra storage on disk. - -3.4 File data -------------- - -Regular files consist of a sequence of contiguous compressed blocks, and/or a -compressed fragment block (tail-end packed block). The compressed size -of each datablock is stored in a block list contained within the -file inode. - -To speed up access to datablocks when reading 'large' files (256 Mbytes or -larger), the code implements an index cache that caches the mapping from -block index to datablock location on disk. - -The index cache allows Squashfs to handle large files (up to 1.75 TiB) while -retaining a simple and space-efficient block list on disk. The cache -is split into slots, caching up to eight 224 GiB files (128 KiB blocks). -Larger files use multiple slots, with 1.75 TiB files using all 8 slots. -The index cache is designed to be memory efficient, and by default uses -16 KiB. - -3.5 Fragment lookup table -------------------------- - -Regular files can contain a fragment index which is mapped to a fragment -location on disk and compressed size using a fragment lookup table. This -fragment lookup table is itself stored compressed into metadata blocks. -A second index table is used to locate these. This second index table for -speed of access (and because it is small) is read at mount time and cached -in memory. - -3.6 Uid/gid lookup table ------------------------- - -For space efficiency regular files store uid and gid indexes, which are -converted to 32-bit uids/gids using an id look up table. This table is -stored compressed into metadata blocks. A second index table is used to -locate these. This second index table for speed of access (and because it -is small) is read at mount time and cached in memory. - -3.7 Export table ----------------- - -To enable Squashfs filesystems to be exportable (via NFS etc.) filesystems -can optionally (disabled with the -no-exports Mksquashfs option) contain -an inode number to inode disk location lookup table. This is required to -enable Squashfs to map inode numbers passed in filehandles to the inode -location on disk, which is necessary when the export code reinstantiates -expired/flushed inodes. - -This table is stored compressed into metadata blocks. A second index table is -used to locate these. This second index table for speed of access (and because -it is small) is read at mount time and cached in memory. - -3.8 Xattr table ---------------- - -The xattr table contains extended attributes for each inode. The xattrs -for each inode are stored in a list, each list entry containing a type, -name and value field. The type field encodes the xattr prefix -("user.", "trusted." etc) and it also encodes how the name/value fields -should be interpreted. Currently the type indicates whether the value -is stored inline (in which case the value field contains the xattr value), -or if it is stored out of line (in which case the value field stores a -reference to where the actual value is stored). This allows large values -to be stored out of line improving scanning and lookup performance and it -also allows values to be de-duplicated, the value being stored once, and -all other occurrences holding an out of line reference to that value. - -The xattr lists are packed into compressed 8K metadata blocks. -To reduce overhead in inodes, rather than storing the on-disk -location of the xattr list inside each inode, a 32-bit xattr id -is stored. This xattr id is mapped into the location of the xattr -list using a second xattr id lookup table. - -4. TODOS AND OUTSTANDING ISSUES -------------------------------- - -4.1 Todo list -------------- - -Implement ACL support. - -4.2 Squashfs internal cache ---------------------------- - -Blocks in Squashfs are compressed. To avoid repeatedly decompressing -recently accessed data Squashfs uses two small metadata and fragment caches. - -The cache is not used for file datablocks, these are decompressed and cached in -the page-cache in the normal way. The cache is used to temporarily cache -fragment and metadata blocks which have been read as a result of a metadata -(i.e. inode or directory) or fragment access. Because metadata and fragments -are packed together into blocks (to gain greater compression) the read of a -particular piece of metadata or fragment will retrieve other metadata/fragments -which have been packed with it, these because of locality-of-reference may be -read in the near future. Temporarily caching them ensures they are available -for near future access without requiring an additional read and decompress. - -In the future this internal cache may be replaced with an implementation which -uses the kernel page cache. Because the page cache operates on page sized -units this may introduce additional complexity in terms of locking and -associated race conditions. -- cgit v1.2.3 From 86beb976700b26576fe522a94a0b3a4e3d5ce424 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:24 +0100 Subject: docs: filesystems: convert sysfs.txt to ReST - Add a SPDX header; - Add a document title; - Adjust document and section titles; - use :field: markup; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/5c480dcb467315b5df6e25372a65e473b585c36d.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/sysfs.rst | 418 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/sysfs.txt | 408 ----------------------------------- 3 files changed, 419 insertions(+), 408 deletions(-) create mode 100644 Documentation/filesystems/sysfs.rst delete mode 100644 Documentation/filesystems/sysfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 97a5f65ae509..bafe92c72433 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -87,5 +87,6 @@ Documentation for filesystem implementations. relay romfs squashfs + sysfs virtiofs vfat diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst new file mode 100644 index 000000000000..290891c3fecb --- /dev/null +++ b/Documentation/filesystems/sysfs.rst @@ -0,0 +1,418 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================================================== +sysfs - _The_ filesystem for exporting kernel objects +===================================================== + +Patrick Mochel + +Mike Murphy + +:Revised: 16 August 2011 +:Original: 10 January 2003 + + +What it is: +~~~~~~~~~~~ + +sysfs is a ram-based filesystem initially based on ramfs. It provides +a means to export kernel data structures, their attributes, and the +linkages between them to userspace. + +sysfs is tied inherently to the kobject infrastructure. Please read +Documentation/kobject.txt for more information concerning the kobject +interface. + + +Using sysfs +~~~~~~~~~~~ + +sysfs is always compiled in if CONFIG_SYSFS is defined. You can access +it by doing:: + + mount -t sysfs sysfs /sys + + +Directory Creation +~~~~~~~~~~~~~~~~~~ + +For every kobject that is registered with the system, a directory is +created for it in sysfs. That directory is created as a subdirectory +of the kobject's parent, expressing internal object hierarchies to +userspace. Top-level directories in sysfs represent the common +ancestors of object hierarchies; i.e. the subsystems the objects +belong to. + +Sysfs internally stores a pointer to the kobject that implements a +directory in the kernfs_node object associated with the directory. In +the past this kobject pointer has been used by sysfs to do reference +counting directly on the kobject whenever the file is opened or closed. +With the current sysfs implementation the kobject reference count is +only modified directly by the function sysfs_schedule_callback(). + + +Attributes +~~~~~~~~~~ + +Attributes can be exported for kobjects in the form of regular files in +the filesystem. Sysfs forwards file I/O operations to methods defined +for the attributes, providing a means to read and write kernel +attributes. + +Attributes should be ASCII text files, preferably with only one value +per file. It is noted that it may not be efficient to contain only one +value per file, so it is socially acceptable to express an array of +values of the same type. + +Mixing types, expressing multiple lines of data, and doing fancy +formatting of data is heavily frowned upon. Doing these things may get +you publicly humiliated and your code rewritten without notice. + + +An attribute definition is simply:: + + struct attribute { + char * name; + struct module *owner; + umode_t mode; + }; + + + int sysfs_create_file(struct kobject * kobj, const struct attribute * attr); + void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr); + + +A bare attribute contains no means to read or write the value of the +attribute. Subsystems are encouraged to define their own attribute +structure and wrapper functions for adding and removing attributes for +a specific object type. + +For example, the driver model defines struct device_attribute like:: + + struct device_attribute { + struct attribute attr; + ssize_t (*show)(struct device *dev, struct device_attribute *attr, + char *buf); + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); + }; + + int device_create_file(struct device *, const struct device_attribute *); + void device_remove_file(struct device *, const struct device_attribute *); + +It also defines this helper for defining device attributes:: + + #define DEVICE_ATTR(_name, _mode, _show, _store) \ + struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) + +For example, declaring:: + + static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo); + +is equivalent to doing:: + + static struct device_attribute dev_attr_foo = { + .attr = { + .name = "foo", + .mode = S_IWUSR | S_IRUGO, + }, + .show = show_foo, + .store = store_foo, + }; + +Note as stated in include/linux/kernel.h "OTHER_WRITABLE? Generally +considered a bad idea." so trying to set a sysfs file writable for +everyone will fail reverting to RO mode for "Others". + +For the common cases sysfs.h provides convenience macros to make +defining attributes easier as well as making code more concise and +readable. The above case could be shortened to: + +static struct device_attribute dev_attr_foo = __ATTR_RW(foo); + +the list of helpers available to define your wrapper function is: + +__ATTR_RO(name): + assumes default name_show and mode 0444 +__ATTR_WO(name): + assumes a name_store only and is restricted to mode + 0200 that is root write access only. +__ATTR_RO_MODE(name, mode): + fore more restrictive RO access currently + only use case is the EFI System Resource Table + (see drivers/firmware/efi/esrt.c) +__ATTR_RW(name): + assumes default name_show, name_store and setting + mode to 0644. +__ATTR_NULL: + which sets the name to NULL and is used as end of list + indicator (see: kernel/workqueue.c) + +Subsystem-Specific Callbacks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When a subsystem defines a new attribute type, it must implement a +set of sysfs operations for forwarding read and write calls to the +show and store methods of the attribute owners:: + + struct sysfs_ops { + ssize_t (*show)(struct kobject *, struct attribute *, char *); + ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); + }; + +[ Subsystems should have already defined a struct kobj_type as a +descriptor for this type, which is where the sysfs_ops pointer is +stored. See the kobject documentation for more information. ] + +When a file is read or written, sysfs calls the appropriate method +for the type. The method then translates the generic struct kobject +and struct attribute pointers to the appropriate pointer types, and +calls the associated methods. + + +To illustrate:: + + #define to_dev(obj) container_of(obj, struct device, kobj) + #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) + + static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) + { + struct device_attribute *dev_attr = to_dev_attr(attr); + struct device *dev = to_dev(kobj); + ssize_t ret = -EIO; + + if (dev_attr->show) + ret = dev_attr->show(dev, dev_attr, buf); + if (ret >= (ssize_t)PAGE_SIZE) { + printk("dev_attr_show: %pS returned bad count\n", + dev_attr->show); + } + return ret; + } + + + +Reading/Writing Attribute Data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To read or write attributes, show() or store() methods must be +specified when declaring the attribute. The method types should be as +simple as those defined for device attributes:: + + ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf); + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); + +IOW, they should take only an object, an attribute, and a buffer as parameters. + + +sysfs allocates a buffer of size (PAGE_SIZE) and passes it to the +method. Sysfs will call the method exactly once for each read or +write. This forces the following behavior on the method +implementations: + +- On read(2), the show() method should fill the entire buffer. + Recall that an attribute should only be exporting one value, or an + array of similar values, so this shouldn't be that expensive. + + This allows userspace to do partial reads and forward seeks + arbitrarily over the entire file at will. If userspace seeks back to + zero or does a pread(2) with an offset of '0' the show() method will + be called again, rearmed, to fill the buffer. + +- On write(2), sysfs expects the entire buffer to be passed during the + first write. Sysfs then passes the entire buffer to the store() method. + A terminating null is added after the data on stores. This makes + functions like sysfs_streq() safe to use. + + When writing sysfs files, userspace processes should first read the + entire file, modify the values it wishes to change, then write the + entire buffer back. + + Attribute method implementations should operate on an identical + buffer when reading and writing values. + +Other notes: + +- Writing causes the show() method to be rearmed regardless of current + file position. + +- The buffer will always be PAGE_SIZE bytes in length. On i386, this + is 4096. + +- show() methods should return the number of bytes printed into the + buffer. This is the return value of scnprintf(). + +- show() must not use snprintf() when formatting the value to be + returned to user space. If you can guarantee that an overflow + will never happen you can use sprintf() otherwise you must use + scnprintf(). + +- store() should return the number of bytes used from the buffer. If the + entire buffer has been used, just return the count argument. + +- show() or store() can always return errors. If a bad value comes + through, be sure to return an error. + +- The object passed to the methods will be pinned in memory via sysfs + referencing counting its embedded object. However, the physical + entity (e.g. device) the object represents may not be present. Be + sure to have a way to check this, if necessary. + + +A very simple (and naive) implementation of a device attribute is:: + + static ssize_t show_name(struct device *dev, struct device_attribute *attr, + char *buf) + { + return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name); + } + + static ssize_t store_name(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) + { + snprintf(dev->name, sizeof(dev->name), "%.*s", + (int)min(count, sizeof(dev->name) - 1), buf); + return count; + } + + static DEVICE_ATTR(name, S_IRUGO, show_name, store_name); + + +(Note that the real implementation doesn't allow userspace to set the +name for a device.) + + +Top Level Directory Layout +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The sysfs directory arrangement exposes the relationship of kernel +data structures. + +The top level sysfs directory looks like:: + + block/ + bus/ + class/ + dev/ + devices/ + firmware/ + net/ + fs/ + +devices/ contains a filesystem representation of the device tree. It maps +directly to the internal kernel device tree, which is a hierarchy of +struct device. + +bus/ contains flat directory layout of the various bus types in the +kernel. Each bus's directory contains two subdirectories:: + + devices/ + drivers/ + +devices/ contains symlinks for each device discovered in the system +that point to the device's directory under root/. + +drivers/ contains a directory for each device driver that is loaded +for devices on that particular bus (this assumes that drivers do not +span multiple bus types). + +fs/ contains a directory for some filesystems. Currently each +filesystem wanting to export attributes must create its own hierarchy +below fs/ (see ./fuse.txt for an example). + +dev/ contains two directories char/ and block/. Inside these two +directories there are symlinks named :. These symlinks +point to the sysfs directory for the given device. /sys/dev provides a +quick way to lookup the sysfs interface for a device from the result of +a stat(2) operation. + +More information can driver-model specific features can be found in +Documentation/driver-api/driver-model/. + + +TODO: Finish this section. + + +Current Interfaces +~~~~~~~~~~~~~~~~~~ + +The following interface layers currently exist in sysfs: + + +devices (include/linux/device.h) +-------------------------------- +Structure:: + + struct device_attribute { + struct attribute attr; + ssize_t (*show)(struct device *dev, struct device_attribute *attr, + char *buf); + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count); + }; + +Declaring:: + + DEVICE_ATTR(_name, _mode, _show, _store); + +Creation/Removal:: + + int device_create_file(struct device *dev, const struct device_attribute * attr); + void device_remove_file(struct device *dev, const struct device_attribute * attr); + + +bus drivers (include/linux/device.h) +------------------------------------ +Structure:: + + struct bus_attribute { + struct attribute attr; + ssize_t (*show)(struct bus_type *, char * buf); + ssize_t (*store)(struct bus_type *, const char * buf, size_t count); + }; + +Declaring:: + + static BUS_ATTR_RW(name); + static BUS_ATTR_RO(name); + static BUS_ATTR_WO(name); + +Creation/Removal:: + + int bus_create_file(struct bus_type *, struct bus_attribute *); + void bus_remove_file(struct bus_type *, struct bus_attribute *); + + +device drivers (include/linux/device.h) +--------------------------------------- + +Structure:: + + struct driver_attribute { + struct attribute attr; + ssize_t (*show)(struct device_driver *, char * buf); + ssize_t (*store)(struct device_driver *, const char * buf, + size_t count); + }; + +Declaring:: + + DRIVER_ATTR_RO(_name) + DRIVER_ATTR_RW(_name) + +Creation/Removal:: + + int driver_create_file(struct device_driver *, const struct driver_attribute *); + void driver_remove_file(struct device_driver *, const struct driver_attribute *); + + +Documentation +~~~~~~~~~~~~~ + +The sysfs directory structure and the attributes in each directory define an +ABI between the kernel and user space. As for any ABI, it is important that +this ABI is stable and properly documented. All new sysfs attributes must be +documented in Documentation/ABI. See also Documentation/ABI/README for more +information. diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt deleted file mode 100644 index ddf15b1b0d5a..000000000000 --- a/Documentation/filesystems/sysfs.txt +++ /dev/null @@ -1,408 +0,0 @@ - -sysfs - _The_ filesystem for exporting kernel objects. - -Patrick Mochel -Mike Murphy - -Revised: 16 August 2011 -Original: 10 January 2003 - - -What it is: -~~~~~~~~~~~ - -sysfs is a ram-based filesystem initially based on ramfs. It provides -a means to export kernel data structures, their attributes, and the -linkages between them to userspace. - -sysfs is tied inherently to the kobject infrastructure. Please read -Documentation/kobject.txt for more information concerning the kobject -interface. - - -Using sysfs -~~~~~~~~~~~ - -sysfs is always compiled in if CONFIG_SYSFS is defined. You can access -it by doing: - - mount -t sysfs sysfs /sys - - -Directory Creation -~~~~~~~~~~~~~~~~~~ - -For every kobject that is registered with the system, a directory is -created for it in sysfs. That directory is created as a subdirectory -of the kobject's parent, expressing internal object hierarchies to -userspace. Top-level directories in sysfs represent the common -ancestors of object hierarchies; i.e. the subsystems the objects -belong to. - -Sysfs internally stores a pointer to the kobject that implements a -directory in the kernfs_node object associated with the directory. In -the past this kobject pointer has been used by sysfs to do reference -counting directly on the kobject whenever the file is opened or closed. -With the current sysfs implementation the kobject reference count is -only modified directly by the function sysfs_schedule_callback(). - - -Attributes -~~~~~~~~~~ - -Attributes can be exported for kobjects in the form of regular files in -the filesystem. Sysfs forwards file I/O operations to methods defined -for the attributes, providing a means to read and write kernel -attributes. - -Attributes should be ASCII text files, preferably with only one value -per file. It is noted that it may not be efficient to contain only one -value per file, so it is socially acceptable to express an array of -values of the same type. - -Mixing types, expressing multiple lines of data, and doing fancy -formatting of data is heavily frowned upon. Doing these things may get -you publicly humiliated and your code rewritten without notice. - - -An attribute definition is simply: - -struct attribute { - char * name; - struct module *owner; - umode_t mode; -}; - - -int sysfs_create_file(struct kobject * kobj, const struct attribute * attr); -void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr); - - -A bare attribute contains no means to read or write the value of the -attribute. Subsystems are encouraged to define their own attribute -structure and wrapper functions for adding and removing attributes for -a specific object type. - -For example, the driver model defines struct device_attribute like: - -struct device_attribute { - struct attribute attr; - ssize_t (*show)(struct device *dev, struct device_attribute *attr, - char *buf); - ssize_t (*store)(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count); -}; - -int device_create_file(struct device *, const struct device_attribute *); -void device_remove_file(struct device *, const struct device_attribute *); - -It also defines this helper for defining device attributes: - -#define DEVICE_ATTR(_name, _mode, _show, _store) \ -struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store) - -For example, declaring - -static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo); - -is equivalent to doing: - -static struct device_attribute dev_attr_foo = { - .attr = { - .name = "foo", - .mode = S_IWUSR | S_IRUGO, - }, - .show = show_foo, - .store = store_foo, -}; - -Note as stated in include/linux/kernel.h "OTHER_WRITABLE? Generally -considered a bad idea." so trying to set a sysfs file writable for -everyone will fail reverting to RO mode for "Others". - -For the common cases sysfs.h provides convenience macros to make -defining attributes easier as well as making code more concise and -readable. The above case could be shortened to: - -static struct device_attribute dev_attr_foo = __ATTR_RW(foo); - -the list of helpers available to define your wrapper function is: -__ATTR_RO(name): assumes default name_show and mode 0444 -__ATTR_WO(name): assumes a name_store only and is restricted to mode - 0200 that is root write access only. -__ATTR_RO_MODE(name, mode): fore more restrictive RO access currently - only use case is the EFI System Resource Table - (see drivers/firmware/efi/esrt.c) -__ATTR_RW(name): assumes default name_show, name_store and setting - mode to 0644. -__ATTR_NULL: which sets the name to NULL and is used as end of list - indicator (see: kernel/workqueue.c) - -Subsystem-Specific Callbacks -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When a subsystem defines a new attribute type, it must implement a -set of sysfs operations for forwarding read and write calls to the -show and store methods of the attribute owners. - -struct sysfs_ops { - ssize_t (*show)(struct kobject *, struct attribute *, char *); - ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t); -}; - -[ Subsystems should have already defined a struct kobj_type as a -descriptor for this type, which is where the sysfs_ops pointer is -stored. See the kobject documentation for more information. ] - -When a file is read or written, sysfs calls the appropriate method -for the type. The method then translates the generic struct kobject -and struct attribute pointers to the appropriate pointer types, and -calls the associated methods. - - -To illustrate: - -#define to_dev(obj) container_of(obj, struct device, kobj) -#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) - -static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct device_attribute *dev_attr = to_dev_attr(attr); - struct device *dev = to_dev(kobj); - ssize_t ret = -EIO; - - if (dev_attr->show) - ret = dev_attr->show(dev, dev_attr, buf); - if (ret >= (ssize_t)PAGE_SIZE) { - printk("dev_attr_show: %pS returned bad count\n", - dev_attr->show); - } - return ret; -} - - - -Reading/Writing Attribute Data -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To read or write attributes, show() or store() methods must be -specified when declaring the attribute. The method types should be as -simple as those defined for device attributes: - -ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf); -ssize_t (*store)(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count); - -IOW, they should take only an object, an attribute, and a buffer as parameters. - - -sysfs allocates a buffer of size (PAGE_SIZE) and passes it to the -method. Sysfs will call the method exactly once for each read or -write. This forces the following behavior on the method -implementations: - -- On read(2), the show() method should fill the entire buffer. - Recall that an attribute should only be exporting one value, or an - array of similar values, so this shouldn't be that expensive. - - This allows userspace to do partial reads and forward seeks - arbitrarily over the entire file at will. If userspace seeks back to - zero or does a pread(2) with an offset of '0' the show() method will - be called again, rearmed, to fill the buffer. - -- On write(2), sysfs expects the entire buffer to be passed during the - first write. Sysfs then passes the entire buffer to the store() method. - A terminating null is added after the data on stores. This makes - functions like sysfs_streq() safe to use. - - When writing sysfs files, userspace processes should first read the - entire file, modify the values it wishes to change, then write the - entire buffer back. - - Attribute method implementations should operate on an identical - buffer when reading and writing values. - -Other notes: - -- Writing causes the show() method to be rearmed regardless of current - file position. - -- The buffer will always be PAGE_SIZE bytes in length. On i386, this - is 4096. - -- show() methods should return the number of bytes printed into the - buffer. This is the return value of scnprintf(). - -- show() must not use snprintf() when formatting the value to be - returned to user space. If you can guarantee that an overflow - will never happen you can use sprintf() otherwise you must use - scnprintf(). - -- store() should return the number of bytes used from the buffer. If the - entire buffer has been used, just return the count argument. - -- show() or store() can always return errors. If a bad value comes - through, be sure to return an error. - -- The object passed to the methods will be pinned in memory via sysfs - referencing counting its embedded object. However, the physical - entity (e.g. device) the object represents may not be present. Be - sure to have a way to check this, if necessary. - - -A very simple (and naive) implementation of a device attribute is: - -static ssize_t show_name(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name); -} - -static ssize_t store_name(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - snprintf(dev->name, sizeof(dev->name), "%.*s", - (int)min(count, sizeof(dev->name) - 1), buf); - return count; -} - -static DEVICE_ATTR(name, S_IRUGO, show_name, store_name); - - -(Note that the real implementation doesn't allow userspace to set the -name for a device.) - - -Top Level Directory Layout -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The sysfs directory arrangement exposes the relationship of kernel -data structures. - -The top level sysfs directory looks like: - -block/ -bus/ -class/ -dev/ -devices/ -firmware/ -net/ -fs/ - -devices/ contains a filesystem representation of the device tree. It maps -directly to the internal kernel device tree, which is a hierarchy of -struct device. - -bus/ contains flat directory layout of the various bus types in the -kernel. Each bus's directory contains two subdirectories: - - devices/ - drivers/ - -devices/ contains symlinks for each device discovered in the system -that point to the device's directory under root/. - -drivers/ contains a directory for each device driver that is loaded -for devices on that particular bus (this assumes that drivers do not -span multiple bus types). - -fs/ contains a directory for some filesystems. Currently each -filesystem wanting to export attributes must create its own hierarchy -below fs/ (see ./fuse.txt for an example). - -dev/ contains two directories char/ and block/. Inside these two -directories there are symlinks named :. These symlinks -point to the sysfs directory for the given device. /sys/dev provides a -quick way to lookup the sysfs interface for a device from the result of -a stat(2) operation. - -More information can driver-model specific features can be found in -Documentation/driver-api/driver-model/. - - -TODO: Finish this section. - - -Current Interfaces -~~~~~~~~~~~~~~~~~~ - -The following interface layers currently exist in sysfs: - - -- devices (include/linux/device.h) ----------------------------------- -Structure: - -struct device_attribute { - struct attribute attr; - ssize_t (*show)(struct device *dev, struct device_attribute *attr, - char *buf); - ssize_t (*store)(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count); -}; - -Declaring: - -DEVICE_ATTR(_name, _mode, _show, _store); - -Creation/Removal: - -int device_create_file(struct device *dev, const struct device_attribute * attr); -void device_remove_file(struct device *dev, const struct device_attribute * attr); - - -- bus drivers (include/linux/device.h) --------------------------------------- -Structure: - -struct bus_attribute { - struct attribute attr; - ssize_t (*show)(struct bus_type *, char * buf); - ssize_t (*store)(struct bus_type *, const char * buf, size_t count); -}; - -Declaring: - -static BUS_ATTR_RW(name); -static BUS_ATTR_RO(name); -static BUS_ATTR_WO(name); - -Creation/Removal: - -int bus_create_file(struct bus_type *, struct bus_attribute *); -void bus_remove_file(struct bus_type *, struct bus_attribute *); - - -- device drivers (include/linux/device.h) ------------------------------------------ - -Structure: - -struct driver_attribute { - struct attribute attr; - ssize_t (*show)(struct device_driver *, char * buf); - ssize_t (*store)(struct device_driver *, const char * buf, - size_t count); -}; - -Declaring: - -DRIVER_ATTR_RO(_name) -DRIVER_ATTR_RW(_name) - -Creation/Removal: - -int driver_create_file(struct device_driver *, const struct driver_attribute *); -void driver_remove_file(struct device_driver *, const struct driver_attribute *); - - -Documentation -~~~~~~~~~~~~~ - -The sysfs directory structure and the attributes in each directory define an -ABI between the kernel and user space. As for any ABI, it is important that -this ABI is stable and properly documented. All new sysfs attributes must be -documented in Documentation/ABI. See also Documentation/ABI/README for more -information. -- cgit v1.2.3 From 826a613d3f81695022f324a5cb84fe73ec09e51d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:25 +0100 Subject: docs: filesystems: convert sysv-fs.txt to ReST - Add a SPDX header; - Add a document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/5b96a6efba95773af439ab25a7dbe4d0edf8c867.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/sysv-fs.rst | 264 ++++++++++++++++++++++++++++++++++ Documentation/filesystems/sysv-fs.txt | 197 ------------------------- 3 files changed, 265 insertions(+), 197 deletions(-) create mode 100644 Documentation/filesystems/sysv-fs.rst delete mode 100644 Documentation/filesystems/sysv-fs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index bafe92c72433..d583b8b35196 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -88,5 +88,6 @@ Documentation for filesystem implementations. romfs squashfs sysfs + sysv-fs virtiofs vfat diff --git a/Documentation/filesystems/sysv-fs.rst b/Documentation/filesystems/sysv-fs.rst new file mode 100644 index 000000000000..89e40911ad7c --- /dev/null +++ b/Documentation/filesystems/sysv-fs.rst @@ -0,0 +1,264 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================== +SystemV Filesystem +================== + +It implements all of + - Xenix FS, + - SystemV/386 FS, + - Coherent FS. + +To install: + +* Answer the 'System V and Coherent filesystem support' question with 'y' + when configuring the kernel. +* To mount a disk or a partition, use:: + + mount [-r] -t sysv device mountpoint + + The file system type names:: + + -t sysv + -t xenix + -t coherent + + may be used interchangeably, but the last two will eventually disappear. + +Bugs in the present implementation: + +- Coherent FS: + + - The "free list interleave" n:m is currently ignored. + - Only file systems with no filesystem name and no pack name are recognized. + (See Coherent "man mkfs" for a description of these features.) + +- SystemV Release 2 FS: + + The superblock is only searched in the blocks 9, 15, 18, which + corresponds to the beginning of track 1 on floppy disks. No support + for this FS on hard disk yet. + + +These filesystems are rather similar. Here is a comparison with Minix FS: + +* Linux fdisk reports on partitions + + - Minix FS 0x81 Linux/Minix + - Xenix FS ?? + - SystemV FS ?? + - Coherent FS 0x08 AIX bootable + +* Size of a block or zone (data allocation unit on disk) + + - Minix FS 1024 + - Xenix FS 1024 (also 512 ??) + - SystemV FS 1024 (also 512 and 2048) + - Coherent FS 512 + +* General layout: all have one boot block, one super block and + separate areas for inodes and for directories/data. + On SystemV Release 2 FS (e.g. Microport) the first track is reserved and + all the block numbers (including the super block) are offset by one track. + +* Byte ordering of "short" (16 bit entities) on disk: + + - Minix FS little endian 0 1 + - Xenix FS little endian 0 1 + - SystemV FS little endian 0 1 + - Coherent FS little endian 0 1 + + Of course, this affects only the file system, not the data of files on it! + +* Byte ordering of "long" (32 bit entities) on disk: + + - Minix FS little endian 0 1 2 3 + - Xenix FS little endian 0 1 2 3 + - SystemV FS little endian 0 1 2 3 + - Coherent FS PDP-11 2 3 0 1 + + Of course, this affects only the file system, not the data of files on it! + +* Inode on disk: "short", 0 means non-existent, the root dir ino is: + + ================================= == + Minix FS 1 + Xenix FS, SystemV FS, Coherent FS 2 + ================================= == + +* Maximum number of hard links to a file: + + =========== ========= + Minix FS 250 + Xenix FS ?? + SystemV FS ?? + Coherent FS >=10000 + =========== ========= + +* Free inode management: + + - Minix FS + a bitmap + - Xenix FS, SystemV FS, Coherent FS + There is a cache of a certain number of free inodes in the super-block. + When it is exhausted, new free inodes are found using a linear search. + +* Free block management: + + - Minix FS + a bitmap + - Xenix FS, SystemV FS, Coherent FS + Free blocks are organized in a "free list". Maybe a misleading term, + since it is not true that every free block contains a pointer to + the next free block. Rather, the free blocks are organized in chunks + of limited size, and every now and then a free block contains pointers + to the free blocks pertaining to the next chunk; the first of these + contains pointers and so on. The list terminates with a "block number" + 0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS. + +* Super-block location: + + =========== ========================== + Minix FS block 1 = bytes 1024..2047 + Xenix FS block 1 = bytes 1024..2047 + SystemV FS bytes 512..1023 + Coherent FS block 1 = bytes 512..1023 + =========== ========================== + +* Super-block layout: + + - Minix FS:: + + unsigned short s_ninodes; + unsigned short s_nzones; + unsigned short s_imap_blocks; + unsigned short s_zmap_blocks; + unsigned short s_firstdatazone; + unsigned short s_log_zone_size; + unsigned long s_max_size; + unsigned short s_magic; + + - Xenix FS, SystemV FS, Coherent FS:: + + unsigned short s_firstdatazone; + unsigned long s_nzones; + unsigned short s_fzone_count; + unsigned long s_fzones[NICFREE]; + unsigned short s_finode_count; + unsigned short s_finodes[NICINOD]; + char s_flock; + char s_ilock; + char s_modified; + char s_rdonly; + unsigned long s_time; + short s_dinfo[4]; -- SystemV FS only + unsigned long s_free_zones; + unsigned short s_free_inodes; + short s_dinfo[4]; -- Xenix FS only + unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only + char s_fname[6]; + char s_fpack[6]; + + then they differ considerably: + + Xenix FS:: + + char s_clean; + char s_fill[371]; + long s_magic; + long s_type; + + SystemV FS:: + + long s_fill[12 or 14]; + long s_state; + long s_magic; + long s_type; + + Coherent FS:: + + unsigned long s_unique; + + Note that Coherent FS has no magic. + +* Inode layout: + + - Minix FS:: + + unsigned short i_mode; + unsigned short i_uid; + unsigned long i_size; + unsigned long i_time; + unsigned char i_gid; + unsigned char i_nlinks; + unsigned short i_zone[7+1+1]; + + - Xenix FS, SystemV FS, Coherent FS:: + + unsigned short i_mode; + unsigned short i_nlink; + unsigned short i_uid; + unsigned short i_gid; + unsigned long i_size; + unsigned char i_zone[3*(10+1+1+1)]; + unsigned long i_atime; + unsigned long i_mtime; + unsigned long i_ctime; + + +* Regular file data blocks are organized as + + - Minix FS: + + - 7 direct blocks + - 1 indirect block (pointers to blocks) + - 1 double-indirect block (pointer to pointers to blocks) + + - Xenix FS, SystemV FS, Coherent FS: + + - 10 direct blocks + - 1 indirect block (pointers to blocks) + - 1 double-indirect block (pointer to pointers to blocks) + - 1 triple-indirect block (pointer to pointers to pointers to blocks) + + + =========== ========== ================ + Inode size inodes per block + =========== ========== ================ + Minix FS 32 32 + Xenix FS 64 16 + SystemV FS 64 16 + Coherent FS 64 8 + =========== ========== ================ + +* Directory entry on disk + + - Minix FS:: + + unsigned short inode; + char name[14/30]; + + - Xenix FS, SystemV FS, Coherent FS:: + + unsigned short inode; + char name[14]; + + =========== ============== ===================== + Dir entry size dir entries per block + =========== ============== ===================== + Minix FS 16/32 64/32 + Xenix FS 16 64 + SystemV FS 16 64 + Coherent FS 16 32 + =========== ============== ===================== + +* How to implement symbolic links such that the host fsck doesn't scream: + + - Minix FS normal + - Xenix FS kludge: as regular files with chmod 1000 + - SystemV FS ?? + - Coherent FS kludge: as regular files with chmod 1000 + + +Notation: We often speak of a "block" but mean a zone (the allocation unit) +and not the disk driver's notion of "block". diff --git a/Documentation/filesystems/sysv-fs.txt b/Documentation/filesystems/sysv-fs.txt deleted file mode 100644 index 253b50d1328e..000000000000 --- a/Documentation/filesystems/sysv-fs.txt +++ /dev/null @@ -1,197 +0,0 @@ -It implements all of - - Xenix FS, - - SystemV/386 FS, - - Coherent FS. - -To install: -* Answer the 'System V and Coherent filesystem support' question with 'y' - when configuring the kernel. -* To mount a disk or a partition, use - mount [-r] -t sysv device mountpoint - The file system type names - -t sysv - -t xenix - -t coherent - may be used interchangeably, but the last two will eventually disappear. - -Bugs in the present implementation: -- Coherent FS: - - The "free list interleave" n:m is currently ignored. - - Only file systems with no filesystem name and no pack name are recognized. - (See Coherent "man mkfs" for a description of these features.) -- SystemV Release 2 FS: - The superblock is only searched in the blocks 9, 15, 18, which - corresponds to the beginning of track 1 on floppy disks. No support - for this FS on hard disk yet. - - -These filesystems are rather similar. Here is a comparison with Minix FS: - -* Linux fdisk reports on partitions - - Minix FS 0x81 Linux/Minix - - Xenix FS ?? - - SystemV FS ?? - - Coherent FS 0x08 AIX bootable - -* Size of a block or zone (data allocation unit on disk) - - Minix FS 1024 - - Xenix FS 1024 (also 512 ??) - - SystemV FS 1024 (also 512 and 2048) - - Coherent FS 512 - -* General layout: all have one boot block, one super block and - separate areas for inodes and for directories/data. - On SystemV Release 2 FS (e.g. Microport) the first track is reserved and - all the block numbers (including the super block) are offset by one track. - -* Byte ordering of "short" (16 bit entities) on disk: - - Minix FS little endian 0 1 - - Xenix FS little endian 0 1 - - SystemV FS little endian 0 1 - - Coherent FS little endian 0 1 - Of course, this affects only the file system, not the data of files on it! - -* Byte ordering of "long" (32 bit entities) on disk: - - Minix FS little endian 0 1 2 3 - - Xenix FS little endian 0 1 2 3 - - SystemV FS little endian 0 1 2 3 - - Coherent FS PDP-11 2 3 0 1 - Of course, this affects only the file system, not the data of files on it! - -* Inode on disk: "short", 0 means non-existent, the root dir ino is: - - Minix FS 1 - - Xenix FS, SystemV FS, Coherent FS 2 - -* Maximum number of hard links to a file: - - Minix FS 250 - - Xenix FS ?? - - SystemV FS ?? - - Coherent FS >=10000 - -* Free inode management: - - Minix FS a bitmap - - Xenix FS, SystemV FS, Coherent FS - There is a cache of a certain number of free inodes in the super-block. - When it is exhausted, new free inodes are found using a linear search. - -* Free block management: - - Minix FS a bitmap - - Xenix FS, SystemV FS, Coherent FS - Free blocks are organized in a "free list". Maybe a misleading term, - since it is not true that every free block contains a pointer to - the next free block. Rather, the free blocks are organized in chunks - of limited size, and every now and then a free block contains pointers - to the free blocks pertaining to the next chunk; the first of these - contains pointers and so on. The list terminates with a "block number" - 0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS. - -* Super-block location: - - Minix FS block 1 = bytes 1024..2047 - - Xenix FS block 1 = bytes 1024..2047 - - SystemV FS bytes 512..1023 - - Coherent FS block 1 = bytes 512..1023 - -* Super-block layout: - - Minix FS - unsigned short s_ninodes; - unsigned short s_nzones; - unsigned short s_imap_blocks; - unsigned short s_zmap_blocks; - unsigned short s_firstdatazone; - unsigned short s_log_zone_size; - unsigned long s_max_size; - unsigned short s_magic; - - Xenix FS, SystemV FS, Coherent FS - unsigned short s_firstdatazone; - unsigned long s_nzones; - unsigned short s_fzone_count; - unsigned long s_fzones[NICFREE]; - unsigned short s_finode_count; - unsigned short s_finodes[NICINOD]; - char s_flock; - char s_ilock; - char s_modified; - char s_rdonly; - unsigned long s_time; - short s_dinfo[4]; -- SystemV FS only - unsigned long s_free_zones; - unsigned short s_free_inodes; - short s_dinfo[4]; -- Xenix FS only - unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only - char s_fname[6]; - char s_fpack[6]; - then they differ considerably: - Xenix FS - char s_clean; - char s_fill[371]; - long s_magic; - long s_type; - SystemV FS - long s_fill[12 or 14]; - long s_state; - long s_magic; - long s_type; - Coherent FS - unsigned long s_unique; - Note that Coherent FS has no magic. - -* Inode layout: - - Minix FS - unsigned short i_mode; - unsigned short i_uid; - unsigned long i_size; - unsigned long i_time; - unsigned char i_gid; - unsigned char i_nlinks; - unsigned short i_zone[7+1+1]; - - Xenix FS, SystemV FS, Coherent FS - unsigned short i_mode; - unsigned short i_nlink; - unsigned short i_uid; - unsigned short i_gid; - unsigned long i_size; - unsigned char i_zone[3*(10+1+1+1)]; - unsigned long i_atime; - unsigned long i_mtime; - unsigned long i_ctime; - -* Regular file data blocks are organized as - - Minix FS - 7 direct blocks - 1 indirect block (pointers to blocks) - 1 double-indirect block (pointer to pointers to blocks) - - Xenix FS, SystemV FS, Coherent FS - 10 direct blocks - 1 indirect block (pointers to blocks) - 1 double-indirect block (pointer to pointers to blocks) - 1 triple-indirect block (pointer to pointers to pointers to blocks) - -* Inode size, inodes per block - - Minix FS 32 32 - - Xenix FS 64 16 - - SystemV FS 64 16 - - Coherent FS 64 8 - -* Directory entry on disk - - Minix FS - unsigned short inode; - char name[14/30]; - - Xenix FS, SystemV FS, Coherent FS - unsigned short inode; - char name[14]; - -* Dir entry size, dir entries per block - - Minix FS 16/32 64/32 - - Xenix FS 16 64 - - SystemV FS 16 64 - - Coherent FS 16 32 - -* How to implement symbolic links such that the host fsck doesn't scream: - - Minix FS normal - - Xenix FS kludge: as regular files with chmod 1000 - - SystemV FS ?? - - Coherent FS kludge: as regular files with chmod 1000 - - -Notation: We often speak of a "block" but mean a zone (the allocation unit) -and not the disk driver's notion of "block". -- cgit v1.2.3 From 7e7cd458b8105b02e69e3af2ef4cd186326d7f84 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:26 +0100 Subject: docs: filesystems: convert tmpfs.txt to ReST - Add a SPDX header; - Add a document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Use :field: markup; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/30397a47a78ca59760fbc0fc5f50c5f1002d487a.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/tmpfs.rst | 163 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/tmpfs.txt | 149 -------------------------------- 3 files changed, 164 insertions(+), 149 deletions(-) create mode 100644 Documentation/filesystems/tmpfs.rst delete mode 100644 Documentation/filesystems/tmpfs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index d583b8b35196..27d37e7712da 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -89,5 +89,6 @@ Documentation for filesystem implementations. squashfs sysfs sysv-fs + tmpfs virtiofs vfat diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst new file mode 100644 index 000000000000..4e95929301a5 --- /dev/null +++ b/Documentation/filesystems/tmpfs.rst @@ -0,0 +1,163 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===== +Tmpfs +===== + +Tmpfs is a file system which keeps all files in virtual memory. + + +Everything in tmpfs is temporary in the sense that no files will be +created on your hard drive. If you unmount a tmpfs instance, +everything stored therein is lost. + +tmpfs puts everything into the kernel internal caches and grows and +shrinks to accommodate the files it contains and is able to swap +unneeded pages out to swap space. It has maximum size limits which can +be adjusted on the fly via 'mount -o remount ...' + +If you compare it to ramfs (which was the template to create tmpfs) +you gain swapping and limit checking. Another similar thing is the RAM +disk (/dev/ram*), which simulates a fixed size hard disk in physical +RAM, where you have to create an ordinary filesystem on top. Ramdisks +cannot swap and you do not have the possibility to resize them. + +Since tmpfs lives completely in the page cache and on swap, all tmpfs +pages will be shown as "Shmem" in /proc/meminfo and "Shared" in +free(1). Notice that these counters also include shared memory +(shmem, see ipcs(1)). The most reliable way to get the count is +using df(1) and du(1). + +tmpfs has the following uses: + +1) There is always a kernel internal mount which you will not see at + all. This is used for shared anonymous mappings and SYSV shared + memory. + + This mount does not depend on CONFIG_TMPFS. If CONFIG_TMPFS is not + set, the user visible part of tmpfs is not build. But the internal + mechanisms are always present. + +2) glibc 2.2 and above expects tmpfs to be mounted at /dev/shm for + POSIX shared memory (shm_open, shm_unlink). Adding the following + line to /etc/fstab should take care of this:: + + tmpfs /dev/shm tmpfs defaults 0 0 + + Remember to create the directory that you intend to mount tmpfs on + if necessary. + + This mount is _not_ needed for SYSV shared memory. The internal + mount is used for that. (In the 2.3 kernel versions it was + necessary to mount the predecessor of tmpfs (shm fs) to use SYSV + shared memory) + +3) Some people (including me) find it very convenient to mount it + e.g. on /tmp and /var/tmp and have a big swap partition. And now + loop mounts of tmpfs files do work, so mkinitrd shipped by most + distributions should succeed with a tmpfs /tmp. + +4) And probably a lot more I do not know about :-) + + +tmpfs has three mount options for sizing: + +========= ============================================================ +size The limit of allocated bytes for this tmpfs instance. The + default is half of your physical RAM without swap. If you + oversize your tmpfs instances the machine will deadlock + since the OOM handler will not be able to free that memory. +nr_blocks The same as size, but in blocks of PAGE_SIZE. +nr_inodes The maximum number of inodes for this instance. The default + is half of the number of your physical RAM pages, or (on a + machine with highmem) the number of lowmem RAM pages, + whichever is the lower. +========= ============================================================ + +These parameters accept a suffix k, m or g for kilo, mega and giga and +can be changed on remount. The size parameter also accepts a suffix % +to limit this tmpfs instance to that percentage of your physical RAM: +the default, when neither size nor nr_blocks is specified, is size=50% + +If nr_blocks=0 (or size=0), blocks will not be limited in that instance; +if nr_inodes=0, inodes will not be limited. It is generally unwise to +mount with such options, since it allows any user with write access to +use up all the memory on the machine; but enhances the scalability of +that instance in a system with many cpus making intensive use of it. + + +tmpfs has a mount option to set the NUMA memory allocation policy for +all files in that instance (if CONFIG_NUMA is enabled) - which can be +adjusted on the fly via 'mount -o remount ...' + +======================== ============================================== +mpol=default use the process allocation policy + (see set_mempolicy(2)) +mpol=prefer:Node prefers to allocate memory from the given Node +mpol=bind:NodeList allocates memory only from nodes in NodeList +mpol=interleave prefers to allocate from each node in turn +mpol=interleave:NodeList allocates from each node of NodeList in turn +mpol=local prefers to allocate memory from the local node +======================== ============================================== + +NodeList format is a comma-separated list of decimal numbers and ranges, +a range being two hyphen-separated decimal numbers, the smallest and +largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15 + +A memory policy with a valid NodeList will be saved, as specified, for +use at file creation time. When a task allocates a file in the file +system, the mount option memory policy will be applied with a NodeList, +if any, modified by the calling task's cpuset constraints +[See Documentation/admin-guide/cgroup-v1/cpusets.rst] and any optional flags, +listed below. If the resulting NodeLists is the empty set, the effective +memory policy for the file will revert to "default" policy. + +NUMA memory allocation policies have optional flags that can be used in +conjunction with their modes. These optional flags can be specified +when tmpfs is mounted by appending them to the mode before the NodeList. +See Documentation/admin-guide/mm/numa_memory_policy.rst for a list of +all available memory allocation policy mode flags and their effect on +memory policy. + +:: + + =static is equivalent to MPOL_F_STATIC_NODES + =relative is equivalent to MPOL_F_RELATIVE_NODES + +For example, mpol=bind=static:NodeList, is the equivalent of an +allocation policy of MPOL_BIND | MPOL_F_STATIC_NODES. + +Note that trying to mount a tmpfs with an mpol option will fail if the +running kernel does not support NUMA; and will fail if its nodelist +specifies a node which is not online. If your system relies on that +tmpfs being mounted, but from time to time runs a kernel built without +NUMA capability (perhaps a safe recovery kernel), or with fewer nodes +online, then it is advisable to omit the mpol option from automatic +mount options. It can be added later, when the tmpfs is already mounted +on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'. + + +To specify the initial root directory you can use the following mount +options: + +==== ================================== +mode The permissions as an octal number +uid The user id +gid The group id +==== ================================== + +These options do not have any effect on remount. You can change these +parameters with chmod(1), chown(1) and chgrp(1) on a mounted filesystem. + + +So 'mount -t tmpfs -o size=10G,nr_inodes=10k,mode=700 tmpfs /mytmpfs' +will give you tmpfs instance on /mytmpfs which can allocate 10GB +RAM/SWAP in 10240 inodes and it is only accessible by root. + + +:Author: + Christoph Rohland , 1.12.01 +:Updated: + Hugh Dickins, 4 June 2007 +:Updated: + KOSAKI Motohiro, 16 Mar 2010 diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt deleted file mode 100644 index 5ecbc03e6b2f..000000000000 --- a/Documentation/filesystems/tmpfs.txt +++ /dev/null @@ -1,149 +0,0 @@ -Tmpfs is a file system which keeps all files in virtual memory. - - -Everything in tmpfs is temporary in the sense that no files will be -created on your hard drive. If you unmount a tmpfs instance, -everything stored therein is lost. - -tmpfs puts everything into the kernel internal caches and grows and -shrinks to accommodate the files it contains and is able to swap -unneeded pages out to swap space. It has maximum size limits which can -be adjusted on the fly via 'mount -o remount ...' - -If you compare it to ramfs (which was the template to create tmpfs) -you gain swapping and limit checking. Another similar thing is the RAM -disk (/dev/ram*), which simulates a fixed size hard disk in physical -RAM, where you have to create an ordinary filesystem on top. Ramdisks -cannot swap and you do not have the possibility to resize them. - -Since tmpfs lives completely in the page cache and on swap, all tmpfs -pages will be shown as "Shmem" in /proc/meminfo and "Shared" in -free(1). Notice that these counters also include shared memory -(shmem, see ipcs(1)). The most reliable way to get the count is -using df(1) and du(1). - -tmpfs has the following uses: - -1) There is always a kernel internal mount which you will not see at - all. This is used for shared anonymous mappings and SYSV shared - memory. - - This mount does not depend on CONFIG_TMPFS. If CONFIG_TMPFS is not - set, the user visible part of tmpfs is not build. But the internal - mechanisms are always present. - -2) glibc 2.2 and above expects tmpfs to be mounted at /dev/shm for - POSIX shared memory (shm_open, shm_unlink). Adding the following - line to /etc/fstab should take care of this: - - tmpfs /dev/shm tmpfs defaults 0 0 - - Remember to create the directory that you intend to mount tmpfs on - if necessary. - - This mount is _not_ needed for SYSV shared memory. The internal - mount is used for that. (In the 2.3 kernel versions it was - necessary to mount the predecessor of tmpfs (shm fs) to use SYSV - shared memory) - -3) Some people (including me) find it very convenient to mount it - e.g. on /tmp and /var/tmp and have a big swap partition. And now - loop mounts of tmpfs files do work, so mkinitrd shipped by most - distributions should succeed with a tmpfs /tmp. - -4) And probably a lot more I do not know about :-) - - -tmpfs has three mount options for sizing: - -size: The limit of allocated bytes for this tmpfs instance. The - default is half of your physical RAM without swap. If you - oversize your tmpfs instances the machine will deadlock - since the OOM handler will not be able to free that memory. -nr_blocks: The same as size, but in blocks of PAGE_SIZE. -nr_inodes: The maximum number of inodes for this instance. The default - is half of the number of your physical RAM pages, or (on a - machine with highmem) the number of lowmem RAM pages, - whichever is the lower. - -These parameters accept a suffix k, m or g for kilo, mega and giga and -can be changed on remount. The size parameter also accepts a suffix % -to limit this tmpfs instance to that percentage of your physical RAM: -the default, when neither size nor nr_blocks is specified, is size=50% - -If nr_blocks=0 (or size=0), blocks will not be limited in that instance; -if nr_inodes=0, inodes will not be limited. It is generally unwise to -mount with such options, since it allows any user with write access to -use up all the memory on the machine; but enhances the scalability of -that instance in a system with many cpus making intensive use of it. - - -tmpfs has a mount option to set the NUMA memory allocation policy for -all files in that instance (if CONFIG_NUMA is enabled) - which can be -adjusted on the fly via 'mount -o remount ...' - -mpol=default use the process allocation policy - (see set_mempolicy(2)) -mpol=prefer:Node prefers to allocate memory from the given Node -mpol=bind:NodeList allocates memory only from nodes in NodeList -mpol=interleave prefers to allocate from each node in turn -mpol=interleave:NodeList allocates from each node of NodeList in turn -mpol=local prefers to allocate memory from the local node - -NodeList format is a comma-separated list of decimal numbers and ranges, -a range being two hyphen-separated decimal numbers, the smallest and -largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15 - -A memory policy with a valid NodeList will be saved, as specified, for -use at file creation time. When a task allocates a file in the file -system, the mount option memory policy will be applied with a NodeList, -if any, modified by the calling task's cpuset constraints -[See Documentation/admin-guide/cgroup-v1/cpusets.rst] and any optional flags, listed -below. If the resulting NodeLists is the empty set, the effective memory -policy for the file will revert to "default" policy. - -NUMA memory allocation policies have optional flags that can be used in -conjunction with their modes. These optional flags can be specified -when tmpfs is mounted by appending them to the mode before the NodeList. -See Documentation/admin-guide/mm/numa_memory_policy.rst for a list of -all available memory allocation policy mode flags and their effect on -memory policy. - - =static is equivalent to MPOL_F_STATIC_NODES - =relative is equivalent to MPOL_F_RELATIVE_NODES - -For example, mpol=bind=static:NodeList, is the equivalent of an -allocation policy of MPOL_BIND | MPOL_F_STATIC_NODES. - -Note that trying to mount a tmpfs with an mpol option will fail if the -running kernel does not support NUMA; and will fail if its nodelist -specifies a node which is not online. If your system relies on that -tmpfs being mounted, but from time to time runs a kernel built without -NUMA capability (perhaps a safe recovery kernel), or with fewer nodes -online, then it is advisable to omit the mpol option from automatic -mount options. It can be added later, when the tmpfs is already mounted -on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'. - - -To specify the initial root directory you can use the following mount -options: - -mode: The permissions as an octal number -uid: The user id -gid: The group id - -These options do not have any effect on remount. You can change these -parameters with chmod(1), chown(1) and chgrp(1) on a mounted filesystem. - - -So 'mount -t tmpfs -o size=10G,nr_inodes=10k,mode=700 tmpfs /mytmpfs' -will give you tmpfs instance on /mytmpfs which can allocate 10GB -RAM/SWAP in 10240 inodes and it is only accessible by root. - - -Author: - Christoph Rohland , 1.12.01 -Updated: - Hugh Dickins, 4 June 2007 -Updated: - KOSAKI Motohiro, 16 Mar 2010 -- cgit v1.2.3 From 688f118e3139f81f813ba1896931cf8fad93430d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:27 +0100 Subject: docs: filesystems: convert ubifs-authentication.rst.txt to ReST - Add a SPDX header; - Mark some literals as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/0c36091b6660cd372f994bd98e1264491d766c22.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/ubifs-authentication.rst | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 27d37e7712da..bb14738df358 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -90,5 +90,6 @@ Documentation for filesystem implementations. sysfs sysv-fs tmpfs + ubifs-authentication.rst virtiofs vfat diff --git a/Documentation/filesystems/ubifs-authentication.rst b/Documentation/filesystems/ubifs-authentication.rst index 6a9584f6ff46..16efd729bf7c 100644 --- a/Documentation/filesystems/ubifs-authentication.rst +++ b/Documentation/filesystems/ubifs-authentication.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + :orphan: .. UBIFS Authentication @@ -92,11 +94,11 @@ UBIFS Index & Tree Node Cache ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Basic on-flash UBIFS entities are called *nodes*. UBIFS knows different types -of nodes. Eg. data nodes (`struct ubifs_data_node`) which store chunks of file -contents or inode nodes (`struct ubifs_ino_node`) which represent VFS inodes. -Almost all types of nodes share a common header (`ubifs_ch`) containing basic +of nodes. Eg. data nodes (``struct ubifs_data_node``) which store chunks of file +contents or inode nodes (``struct ubifs_ino_node``) which represent VFS inodes. +Almost all types of nodes share a common header (``ubifs_ch``) containing basic information like node type, node length, a sequence number, etc. (see -`fs/ubifs/ubifs-media.h`in kernel source). Exceptions are entries of the LPT +``fs/ubifs/ubifs-media.h`` in kernel source). Exceptions are entries of the LPT and some less important node types like padding nodes which are used to pad unusable content at the end of LEBs. -- cgit v1.2.3 From 38e56b4ec44139b5781d6ff13f1b422e4b38f0d4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:28 +0100 Subject: docs: filesystems: convert ubifs.txt to ReST - Add a SPDX header; - Add a document title; - Adjust section titles; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add table markups; - Add lists markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/9043dc2965cafc64e6a521e2317c00ecc8303bf6.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/ubifs.rst | 137 ++++++++++++++++++++++++++++++++++++ Documentation/filesystems/ubifs.txt | 126 --------------------------------- 3 files changed, 138 insertions(+), 126 deletions(-) create mode 100644 Documentation/filesystems/ubifs.rst delete mode 100644 Documentation/filesystems/ubifs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index bb14738df358..58d57c9bf922 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -90,6 +90,7 @@ Documentation for filesystem implementations. sysfs sysv-fs tmpfs + ubifs ubifs-authentication.rst virtiofs vfat diff --git a/Documentation/filesystems/ubifs.rst b/Documentation/filesystems/ubifs.rst new file mode 100644 index 000000000000..e6ee99762534 --- /dev/null +++ b/Documentation/filesystems/ubifs.rst @@ -0,0 +1,137 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +UBI File System +=============== + +Introduction +============ + +UBIFS file-system stands for UBI File System. UBI stands for "Unsorted +Block Images". UBIFS is a flash file system, which means it is designed +to work with flash devices. It is important to understand, that UBIFS +is completely different to any traditional file-system in Linux, like +Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems +which work with MTD devices, not block devices. The other Linux +file-system of this class is JFFS2. + +To make it more clear, here is a small comparison of MTD devices and +block devices. + +1 MTD devices represent flash devices and they consist of eraseblocks of + rather large size, typically about 128KiB. Block devices consist of + small blocks, typically 512 bytes. +2 MTD devices support 3 main operations - read from some offset within an + eraseblock, write to some offset within an eraseblock, and erase a whole + eraseblock. Block devices support 2 main operations - read a whole + block and write a whole block. +3 The whole eraseblock has to be erased before it becomes possible to + re-write its contents. Blocks may be just re-written. +4 Eraseblocks become worn out after some number of erase cycles - + typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC + NAND flashes. Blocks do not have the wear-out property. +5 Eraseblocks may become bad (only on NAND flashes) and software should + deal with this. Blocks on hard drives typically do not become bad, + because hardware has mechanisms to substitute bad blocks, at least in + modern LBA disks. + +It should be quite obvious why UBIFS is very different to traditional +file-systems. + +UBIFS works on top of UBI. UBI is a separate software layer which may be +found in drivers/mtd/ubi. UBI is basically a volume management and +wear-leveling layer. It provides so called UBI volumes which is a higher +level abstraction than a MTD device. The programming model of UBI devices +is very similar to MTD devices - they still consist of large eraseblocks, +they have read/write/erase operations, but UBI devices are devoid of +limitations like wear and bad blocks (items 4 and 5 in the above list). + +In a sense, UBIFS is a next generation of JFFS2 file-system, but it is +very different and incompatible to JFFS2. The following are the main +differences. + +* JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on + top of UBI volumes. +* JFFS2 does not have on-media index and has to build it while mounting, + which requires full media scan. UBIFS maintains the FS indexing + information on the flash media and does not require full media scan, + so it mounts many times faster than JFFS2. +* JFFS2 is a write-through file-system, while UBIFS supports write-back, + which makes UBIFS much faster on writes. + +Similarly to JFFS2, UBIFS supports on-the-flight compression which makes +it possible to fit quite a lot of data to the flash. + +Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts. +It does not need stuff like fsck.ext2. UBIFS automatically replays its +journal and recovers from crashes, ensuring that the on-flash data +structures are consistent. + +UBIFS scales logarithmically (most of the data structures it uses are +trees), so the mount time and memory consumption do not linearly depend +on the flash size, like in case of JFFS2. This is because UBIFS +maintains the FS index on the flash media. However, UBIFS depends on +UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly. +Nevertheless, UBI/UBIFS scales considerably better than JFFS2. + +The authors of UBIFS believe, that it is possible to develop UBI2 which +would scale logarithmically as well. UBI2 would support the same API as UBI, +but it would be binary incompatible to UBI. So UBIFS would not need to be +changed to use UBI2 + + +Mount options +============= + +(*) == default. + +==================== ======================================================= +bulk_read read more in one go to take advantage of flash + media that read faster sequentially +no_bulk_read (*) do not bulk-read +no_chk_data_crc (*) skip checking of CRCs on data nodes in order to + improve read performance. Use this option only + if the flash media is highly reliable. The effect + of this option is that corruption of the contents + of a file can go unnoticed. +chk_data_crc do not skip checking CRCs on data nodes +compr=none override default compressor and set it to "none" +compr=lzo override default compressor and set it to "lzo" +compr=zlib override default compressor and set it to "zlib" +auth_key= specify the key used for authenticating the filesystem. + Passing this option makes authentication mandatory. + The passed key must be present in the kernel keyring + and must be of type 'logon' +auth_hash_name= The hash algorithm used for authentication. Used for + both hashing and for creating HMACs. Typical values + include "sha256" or "sha512" +==================== ======================================================= + + +Quick usage instructions +======================== + +The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax, +where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is +UBI volume name. + +Mount volume 0 on UBI device 0 to /mnt/ubifs:: + + $ mount -t ubifs ubi0_0 /mnt/ubifs + +Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume +name):: + + $ mount -t ubifs ubi0:rootfs /mnt/ubifs + +The following is an example of the kernel boot arguments to attach mtd0 +to UBI and mount volume "rootfs": +ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs + +References +========== + +UBIFS documentation and FAQ/HOWTO at the MTD web site: + +- http://www.linux-mtd.infradead.org/doc/ubifs.html +- http://www.linux-mtd.infradead.org/faq/ubifs.html diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt deleted file mode 100644 index acc80442a3bb..000000000000 --- a/Documentation/filesystems/ubifs.txt +++ /dev/null @@ -1,126 +0,0 @@ -Introduction -============= - -UBIFS file-system stands for UBI File System. UBI stands for "Unsorted -Block Images". UBIFS is a flash file system, which means it is designed -to work with flash devices. It is important to understand, that UBIFS -is completely different to any traditional file-system in Linux, like -Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems -which work with MTD devices, not block devices. The other Linux -file-system of this class is JFFS2. - -To make it more clear, here is a small comparison of MTD devices and -block devices. - -1 MTD devices represent flash devices and they consist of eraseblocks of - rather large size, typically about 128KiB. Block devices consist of - small blocks, typically 512 bytes. -2 MTD devices support 3 main operations - read from some offset within an - eraseblock, write to some offset within an eraseblock, and erase a whole - eraseblock. Block devices support 2 main operations - read a whole - block and write a whole block. -3 The whole eraseblock has to be erased before it becomes possible to - re-write its contents. Blocks may be just re-written. -4 Eraseblocks become worn out after some number of erase cycles - - typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC - NAND flashes. Blocks do not have the wear-out property. -5 Eraseblocks may become bad (only on NAND flashes) and software should - deal with this. Blocks on hard drives typically do not become bad, - because hardware has mechanisms to substitute bad blocks, at least in - modern LBA disks. - -It should be quite obvious why UBIFS is very different to traditional -file-systems. - -UBIFS works on top of UBI. UBI is a separate software layer which may be -found in drivers/mtd/ubi. UBI is basically a volume management and -wear-leveling layer. It provides so called UBI volumes which is a higher -level abstraction than a MTD device. The programming model of UBI devices -is very similar to MTD devices - they still consist of large eraseblocks, -they have read/write/erase operations, but UBI devices are devoid of -limitations like wear and bad blocks (items 4 and 5 in the above list). - -In a sense, UBIFS is a next generation of JFFS2 file-system, but it is -very different and incompatible to JFFS2. The following are the main -differences. - -* JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on - top of UBI volumes. -* JFFS2 does not have on-media index and has to build it while mounting, - which requires full media scan. UBIFS maintains the FS indexing - information on the flash media and does not require full media scan, - so it mounts many times faster than JFFS2. -* JFFS2 is a write-through file-system, while UBIFS supports write-back, - which makes UBIFS much faster on writes. - -Similarly to JFFS2, UBIFS supports on-the-flight compression which makes -it possible to fit quite a lot of data to the flash. - -Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts. -It does not need stuff like fsck.ext2. UBIFS automatically replays its -journal and recovers from crashes, ensuring that the on-flash data -structures are consistent. - -UBIFS scales logarithmically (most of the data structures it uses are -trees), so the mount time and memory consumption do not linearly depend -on the flash size, like in case of JFFS2. This is because UBIFS -maintains the FS index on the flash media. However, UBIFS depends on -UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly. -Nevertheless, UBI/UBIFS scales considerably better than JFFS2. - -The authors of UBIFS believe, that it is possible to develop UBI2 which -would scale logarithmically as well. UBI2 would support the same API as UBI, -but it would be binary incompatible to UBI. So UBIFS would not need to be -changed to use UBI2 - - -Mount options -============= - -(*) == default. - -bulk_read read more in one go to take advantage of flash - media that read faster sequentially -no_bulk_read (*) do not bulk-read -no_chk_data_crc (*) skip checking of CRCs on data nodes in order to - improve read performance. Use this option only - if the flash media is highly reliable. The effect - of this option is that corruption of the contents - of a file can go unnoticed. -chk_data_crc do not skip checking CRCs on data nodes -compr=none override default compressor and set it to "none" -compr=lzo override default compressor and set it to "lzo" -compr=zlib override default compressor and set it to "zlib" -auth_key= specify the key used for authenticating the filesystem. - Passing this option makes authentication mandatory. - The passed key must be present in the kernel keyring - and must be of type 'logon' -auth_hash_name= The hash algorithm used for authentication. Used for - both hashing and for creating HMACs. Typical values - include "sha256" or "sha512" - - -Quick usage instructions -======================== - -The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax, -where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is -UBI volume name. - -Mount volume 0 on UBI device 0 to /mnt/ubifs: -$ mount -t ubifs ubi0_0 /mnt/ubifs - -Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume -name): -$ mount -t ubifs ubi0:rootfs /mnt/ubifs - -The following is an example of the kernel boot arguments to attach mtd0 -to UBI and mount volume "rootfs": -ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs - -References -========== - -UBIFS documentation and FAQ/HOWTO at the MTD web site: -http://www.linux-mtd.infradead.org/doc/ubifs.html -http://www.linux-mtd.infradead.org/faq/ubifs.html -- cgit v1.2.3 From c9817ad5d82f04fbc66278eda27bff094dcb3119 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:29 +0100 Subject: docs: filesystems: convert udf.txt to ReST - Add a SPDX header; - Add a document title; - Add table markups; - Add lists markups; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Jan Kara Link: https://lore.kernel.org/r/2887f8a3a813a31170389eab687e9f199327dc7d.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/udf.rst | 75 +++++++++++++++++++++++++++++++++++++ Documentation/filesystems/udf.txt | 66 -------------------------------- 3 files changed, 76 insertions(+), 66 deletions(-) create mode 100644 Documentation/filesystems/udf.rst delete mode 100644 Documentation/filesystems/udf.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index 58d57c9bf922..ec03cb4d7353 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -92,5 +92,6 @@ Documentation for filesystem implementations. tmpfs ubifs ubifs-authentication.rst + udf virtiofs vfat diff --git a/Documentation/filesystems/udf.rst b/Documentation/filesystems/udf.rst new file mode 100644 index 000000000000..d9badbf285b2 --- /dev/null +++ b/Documentation/filesystems/udf.rst @@ -0,0 +1,75 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============== +UDF file system +=============== + +If you encounter problems with reading UDF discs using this driver, +please report them according to MAINTAINERS file. + +Write support requires a block driver which supports writing. Currently +dvd+rw drives and media support true random sector writes, and so a udf +filesystem on such devices can be directly mounted read/write. CD-RW +media however, does not support this. Instead the media can be formatted +for packet mode using the utility cdrwtool, then the pktcdvd driver can +be bound to the underlying cd device to provide the required buffering +and read-modify-write cycles to allow the filesystem random sector writes +while providing the hardware with only full packet writes. While not +required for dvd+rw media, use of the pktcdvd driver often enhances +performance due to very poor read-modify-write support supplied internally +by drive firmware. + +------------------------------------------------------------------------------- + +The following mount options are supported: + + =========== ====================================== + gid= Set the default group. + umask= Set the default umask. + mode= Set the default file permissions. + dmode= Set the default directory permissions. + uid= Set the default user. + bs= Set the block size. + unhide Show otherwise hidden files. + undelete Show deleted files in lists. + adinicb Embed data in the inode (default) + noadinicb Don't embed data in the inode + shortad Use short ad's + longad Use long ad's (default) + nostrict Unset strict conformance + iocharset= Set the NLS character set + =========== ====================================== + +The uid= and gid= options need a bit more explaining. They will accept a +decimal numeric value and all inodes on that mount will then appear as +belonging to that uid and gid. Mount options also accept the string "forget". +The forget option causes all IDs to be written to disk as -1 which is a way +of UDF standard to indicate that IDs are not supported for these files . + +For typical desktop use of removable media, you should set the ID to that of +the interactively logged on user, and also specify the forget option. This way +the interactive user will always see the files on the disk as belonging to him. + +The remaining are for debugging and disaster recovery: + + ===== ================================ + novrs Skip volume sequence recognition + ===== ================================ + +The following expect a offset from 0. + + ========== ================================================= + session= Set the CDROM session (default= last session) + anchor= Override standard anchor location. (default= 256) + lastblock= Set the last block of the filesystem/ + ========== ================================================= + +------------------------------------------------------------------------------- + + +For the latest version and toolset see: + https://github.com/pali/udftools + +Documentation on UDF and ECMA 167 is available FREE from: + - http://www.osta.org/ + - http://www.ecma-international.org/ diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt deleted file mode 100644 index e2f2faf32f18..000000000000 --- a/Documentation/filesystems/udf.txt +++ /dev/null @@ -1,66 +0,0 @@ -* -* Documentation/filesystems/udf.txt -* - -If you encounter problems with reading UDF discs using this driver, -please report them according to MAINTAINERS file. - -Write support requires a block driver which supports writing. Currently -dvd+rw drives and media support true random sector writes, and so a udf -filesystem on such devices can be directly mounted read/write. CD-RW -media however, does not support this. Instead the media can be formatted -for packet mode using the utility cdrwtool, then the pktcdvd driver can -be bound to the underlying cd device to provide the required buffering -and read-modify-write cycles to allow the filesystem random sector writes -while providing the hardware with only full packet writes. While not -required for dvd+rw media, use of the pktcdvd driver often enhances -performance due to very poor read-modify-write support supplied internally -by drive firmware. - -------------------------------------------------------------------------------- -The following mount options are supported: - - gid= Set the default group. - umask= Set the default umask. - mode= Set the default file permissions. - dmode= Set the default directory permissions. - uid= Set the default user. - bs= Set the block size. - unhide Show otherwise hidden files. - undelete Show deleted files in lists. - adinicb Embed data in the inode (default) - noadinicb Don't embed data in the inode - shortad Use short ad's - longad Use long ad's (default) - nostrict Unset strict conformance - iocharset= Set the NLS character set - -The uid= and gid= options need a bit more explaining. They will accept a -decimal numeric value and all inodes on that mount will then appear as -belonging to that uid and gid. Mount options also accept the string "forget". -The forget option causes all IDs to be written to disk as -1 which is a way -of UDF standard to indicate that IDs are not supported for these files . - -For typical desktop use of removable media, you should set the ID to that of -the interactively logged on user, and also specify the forget option. This way -the interactive user will always see the files on the disk as belonging to him. - -The remaining are for debugging and disaster recovery: - - novrs Skip volume sequence recognition - -The following expect a offset from 0. - - session= Set the CDROM session (default= last session) - anchor= Override standard anchor location. (default= 256) - lastblock= Set the last block of the filesystem/ - -------------------------------------------------------------------------------- - - -For the latest version and toolset see: - https://github.com/pali/udftools - -Documentation on UDF and ECMA 167 is available FREE from: - http://www.osta.org/ - http://www.ecma-international.org/ -- cgit v1.2.3 From 9a6108124c1d27192fee6f058b5de84f51ab62a0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 17 Feb 2020 17:12:30 +0100 Subject: docs: filesystems: convert zonefs.txt to ReST - Add a SPDX header; - Add a document title; - Some whitespace fixes and new line breaks; - Mark literal blocks as such; - Add it to filesystems/index.rst. Signed-off-by: Mauro Carvalho Chehab Acked-by: Damien Le Moal Link: https://lore.kernel.org/r/42a7cfcd19f6b904a9a3188fd4af71bed5050052.1581955849.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/filesystems/index.rst | 1 + Documentation/filesystems/zonefs.rst | 412 +++++++++++++++++++++++++++++++++++ Documentation/filesystems/zonefs.txt | 404 ---------------------------------- 3 files changed, 413 insertions(+), 404 deletions(-) create mode 100644 Documentation/filesystems/zonefs.rst delete mode 100644 Documentation/filesystems/zonefs.txt (limited to 'Documentation') diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index ec03cb4d7353..53f46a88e6ec 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -95,3 +95,4 @@ Documentation for filesystem implementations. udf virtiofs vfat + zonefs diff --git a/Documentation/filesystems/zonefs.rst b/Documentation/filesystems/zonefs.rst new file mode 100644 index 000000000000..7e733e751e98 --- /dev/null +++ b/Documentation/filesystems/zonefs.rst @@ -0,0 +1,412 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================================ +ZoneFS - Zone filesystem for Zoned block devices +================================================ + +Introduction +============ + +zonefs is a very simple file system exposing each zone of a zoned block device +as a file. Unlike a regular POSIX-compliant file system with native zoned block +device support (e.g. f2fs), zonefs does not hide the sequential write +constraint of zoned block devices to the user. Files representing sequential +write zones of the device must be written sequentially starting from the end +of the file (append only writes). + +As such, zonefs is in essence closer to a raw block device access interface +than to a full-featured POSIX file system. The goal of zonefs is to simplify +the implementation of zoned block device support in applications by replacing +raw block device file accesses with a richer file API, avoiding relying on +direct block device file ioctls which may be more obscure to developers. One +example of this approach is the implementation of LSM (log-structured merge) +tree structures (such as used in RocksDB and LevelDB) on zoned block devices +by allowing SSTables to be stored in a zone file similarly to a regular file +system rather than as a range of sectors of the entire disk. The introduction +of the higher level construct "one file is one zone" can help reducing the +amount of changes needed in the application as well as introducing support for +different application programming languages. + +Zoned block devices +------------------- + +Zoned storage devices belong to a class of storage devices with an address +space that is divided into zones. A zone is a group of consecutive LBAs and all +zones are contiguous (there are no LBA gaps). Zones may have different types. + +* Conventional zones: there are no access constraints to LBAs belonging to + conventional zones. Any read or write access can be executed, similarly to a + regular block device. +* Sequential zones: these zones accept random reads but must be written + sequentially. Each sequential zone has a write pointer maintained by the + device that keeps track of the mandatory start LBA position of the next write + to the device. As a result of this write constraint, LBAs in a sequential zone + cannot be overwritten. Sequential zones must first be erased using a special + command (zone reset) before rewriting. + +Zoned storage devices can be implemented using various recording and media +technologies. The most common form of zoned storage today uses the SCSI Zoned +Block Commands (ZBC) and Zoned ATA Commands (ZAC) interfaces on Shingled +Magnetic Recording (SMR) HDDs. + +Solid State Disks (SSD) storage devices can also implement a zoned interface +to, for instance, reduce internal write amplification due to garbage collection. +The NVMe Zoned NameSpace (ZNS) is a technical proposal of the NVMe standard +committee aiming at adding a zoned storage interface to the NVMe protocol. + +Zonefs Overview +=============== + +Zonefs exposes the zones of a zoned block device as files. The files +representing zones are grouped by zone type, which are themselves represented +by sub-directories. This file structure is built entirely using zone information +provided by the device and so does not require any complex on-disk metadata +structure. + +On-disk metadata +---------------- + +zonefs on-disk metadata is reduced to an immutable super block which +persistently stores a magic number and optional feature flags and values. On +mount, zonefs uses blkdev_report_zones() to obtain the device zone configuration +and populates the mount point with a static file tree solely based on this +information. File sizes come from the device zone type and write pointer +position managed by the device itself. + +The super block is always written on disk at sector 0. The first zone of the +device storing the super block is never exposed as a zone file by zonefs. If +the zone containing the super block is a sequential zone, the mkzonefs format +tool always "finishes" the zone, that is, it transitions the zone to a full +state to make it read-only, preventing any data write. + +Zone type sub-directories +------------------------- + +Files representing zones of the same type are grouped together under the same +sub-directory automatically created on mount. + +For conventional zones, the sub-directory "cnv" is used. This directory is +however created if and only if the device has usable conventional zones. If +the device only has a single conventional zone at sector 0, the zone will not +be exposed as a file as it will be used to store the zonefs super block. For +such devices, the "cnv" sub-directory will not be created. + +For sequential write zones, the sub-directory "seq" is used. + +These two directories are the only directories that exist in zonefs. Users +cannot create other directories and cannot rename nor delete the "cnv" and +"seq" sub-directories. + +The size of the directories indicated by the st_size field of struct stat, +obtained with the stat() or fstat() system calls, indicates the number of files +existing under the directory. + +Zone files +---------- + +Zone files are named using the number of the zone they represent within the set +of zones of a particular type. That is, both the "cnv" and "seq" directories +contain files named "0", "1", "2", ... The file numbers also represent +increasing zone start sector on the device. + +All read and write operations to zone files are not allowed beyond the file +maximum size, that is, beyond the zone size. Any access exceeding the zone +size is failed with the -EFBIG error. + +Creating, deleting, renaming or modifying any attribute of files and +sub-directories is not allowed. + +The number of blocks of a file as reported by stat() and fstat() indicates the +size of the file zone, or in other words, the maximum file size. + +Conventional zone files +----------------------- + +The size of conventional zone files is fixed to the size of the zone they +represent. Conventional zone files cannot be truncated. + +These files can be randomly read and written using any type of I/O operation: +buffered I/Os, direct I/Os, memory mapped I/Os (mmap), etc. There are no I/O +constraint for these files beyond the file size limit mentioned above. + +Sequential zone files +--------------------- + +The size of sequential zone files grouped in the "seq" sub-directory represents +the file's zone write pointer position relative to the zone start sector. + +Sequential zone files can only be written sequentially, starting from the file +end, that is, write operations can only be append writes. Zonefs makes no +attempt at accepting random writes and will fail any write request that has a +start offset not corresponding to the end of the file, or to the end of the last +write issued and still in-flight (for asynchrnous I/O operations). + +Since dirty page writeback by the page cache does not guarantee a sequential +write pattern, zonefs prevents buffered writes and writeable shared mappings +on sequential files. Only direct I/O writes are accepted for these files. +zonefs relies on the sequential delivery of write I/O requests to the device +implemented by the block layer elevator. An elevator implementing the sequential +write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature) +must be used. This type of elevator (e.g. mq-deadline) is the set by default +for zoned block devices on device initialization. + +There are no restrictions on the type of I/O used for read operations in +sequential zone files. Buffered I/Os, direct I/Os and shared read mappings are +all accepted. + +Truncating sequential zone files is allowed only down to 0, in which case, the +zone is reset to rewind the file zone write pointer position to the start of +the zone, or up to the zone size, in which case the file's zone is transitioned +to the FULL state (finish zone operation). + +Format options +-------------- + +Several optional features of zonefs can be enabled at format time. + +* Conventional zone aggregation: ranges of contiguous conventional zones can be + aggregated into a single larger file instead of the default one file per zone. +* File ownership: The owner UID and GID of zone files is by default 0 (root) + but can be changed to any valid UID/GID. +* File access permissions: the default 640 access permissions can be changed. + +IO error handling +----------------- + +Zoned block devices may fail I/O requests for reasons similar to regular block +devices, e.g. due to bad sectors. However, in addition to such known I/O +failure pattern, the standards governing zoned block devices behavior define +additional conditions that result in I/O errors. + +* A zone may transition to the read-only condition (BLK_ZONE_COND_READONLY): + While the data already written in the zone is still readable, the zone can + no longer be written. No user action on the zone (zone management command or + read/write access) can change the zone condition back to a normal read/write + state. While the reasons for the device to transition a zone to read-only + state are not defined by the standards, a typical cause for such transition + would be a defective write head on an HDD (all zones under this head are + changed to read-only). + +* A zone may transition to the offline condition (BLK_ZONE_COND_OFFLINE): + An offline zone cannot be read nor written. No user action can transition an + offline zone back to an operational good state. Similarly to zone read-only + transitions, the reasons for a drive to transition a zone to the offline + condition are undefined. A typical cause would be a defective read-write head + on an HDD causing all zones on the platter under the broken head to be + inaccessible. + +* Unaligned write errors: These errors result from the host issuing write + requests with a start sector that does not correspond to a zone write pointer + position when the write request is executed by the device. Even though zonefs + enforces sequential file write for sequential zones, unaligned write errors + may still happen in the case of a partial failure of a very large direct I/O + operation split into multiple BIOs/requests or asynchronous I/O operations. + If one of the write request within the set of sequential write requests + issued to the device fails, all write requests after queued after it will + become unaligned and fail. + +* Delayed write errors: similarly to regular block devices, if the device side + write cache is enabled, write errors may occur in ranges of previously + completed writes when the device write cache is flushed, e.g. on fsync(). + Similarly to the previous immediate unaligned write error case, delayed write + errors can propagate through a stream of cached sequential data for a zone + causing all data to be dropped after the sector that caused the error. + +All I/O errors detected by zonefs are notified to the user with an error code +return for the system call that trigered or detected the error. The recovery +actions taken by zonefs in response to I/O errors depend on the I/O type (read +vs write) and on the reason for the error (bad sector, unaligned writes or zone +condition change). + +* For read I/O errors, zonefs does not execute any particular recovery action, + but only if the file zone is still in a good condition and there is no + inconsistency between the file inode size and its zone write pointer position. + If a problem is detected, I/O error recovery is executed (see below table). + +* For write I/O errors, zonefs I/O error recovery is always executed. + +* A zone condition change to read-only or offline also always triggers zonefs + I/O error recovery. + +Zonefs minimal I/O error recovery may change a file size and a file access +permissions. + +* File size changes: + Immediate or delayed write errors in a sequential zone file may cause the file + inode size to be inconsistent with the amount of data successfully written in + the file zone. For instance, the partial failure of a multi-BIO large write + operation will cause the zone write pointer to advance partially, even though + the entire write operation will be reported as failed to the user. In such + case, the file inode size must be advanced to reflect the zone write pointer + change and eventually allow the user to restart writing at the end of the + file. + A file size may also be reduced to reflect a delayed write error detected on + fsync(): in this case, the amount of data effectively written in the zone may + be less than originally indicated by the file inode size. After such I/O + error, zonefs always fixes a file inode size to reflect the amount of data + persistently stored in the file zone. + +* Access permission changes: + A zone condition change to read-only is indicated with a change in the file + access permissions to render the file read-only. This disables changes to the + file attributes and data modification. For offline zones, all permissions + (read and write) to the file are disabled. + +Further action taken by zonefs I/O error recovery can be controlled by the user +with the "errors=xxx" mount option. The table below summarizes the result of +zonefs I/O error processing depending on the mount option and on the zone +conditions:: + + +--------------+-----------+-----------------------------------------+ + | | | Post error state | + | "errors=xxx" | device | access permissions | + | mount | zone | file file device zone | + | option | condition | size read write read write | + +--------------+-----------+-----------------------------------------+ + | | good | fixed yes no yes yes | + | remount-ro | read-only | fixed yes no yes no | + | (default) | offline | 0 no no no no | + +--------------+-----------+-----------------------------------------+ + | | good | fixed yes no yes yes | + | zone-ro | read-only | fixed yes no yes no | + | | offline | 0 no no no no | + +--------------+-----------+-----------------------------------------+ + | | good | 0 no no yes yes | + | zone-offline | read-only | 0 no no yes no | + | | offline | 0 no no no no | + +--------------+-----------+-----------------------------------------+ + | | good | fixed yes yes yes yes | + | repair | read-only | fixed yes no yes no | + | | offline | 0 no no no no | + +--------------+-----------+-----------------------------------------+ + +Further notes: + +* The "errors=remount-ro" mount option is the default behavior of zonefs I/O + error processing if no errors mount option is specified. +* With the "errors=remount-ro" mount option, the change of the file access + permissions to read-only applies to all files. The file system is remounted + read-only. +* Access permission and file size changes due to the device transitioning zones + to the offline condition are permanent. Remounting or reformating the device + with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good + state. +* File access permission changes to read-only due to the device transitioning + zones to the read-only condition are permanent. Remounting or reformating + the device will not re-enable file write access. +* File access permission changes implied by the remount-ro, zone-ro and + zone-offline mount options are temporary for zones in a good condition. + Unmounting and remounting the file system will restore the previous default + (format time values) access rights to the files affected. +* The repair mount option triggers only the minimal set of I/O error recovery + actions, that is, file size fixes for zones in a good condition. Zones + indicated as being read-only or offline by the device still imply changes to + the zone file access permissions as noted in the table above. + +Mount options +------------- + +zonefs define the "errors=" mount option to allow the user to specify +zonefs behavior in response to I/O errors, inode size inconsistencies or zone +condition chages. The defined behaviors are as follow: + +* remount-ro (default) +* zone-ro +* zone-offline +* repair + +The I/O error actions defined for each behavior is detailed in the previous +section. + +Zonefs User Space Tools +======================= + +The mkzonefs tool is used to format zoned block devices for use with zonefs. +This tool is available on Github at: + +https://github.com/damien-lemoal/zonefs-tools + +zonefs-tools also includes a test suite which can be run against any zoned +block device, including null_blk block device created with zoned mode. + +Examples +-------- + +The following formats a 15TB host-managed SMR HDD with 256 MB zones +with the conventional zones aggregation feature enabled:: + + # mkzonefs -o aggr_cnv /dev/sdX + # mount -t zonefs /dev/sdX /mnt + # ls -l /mnt/ + total 0 + dr-xr-xr-x 2 root root 1 Nov 25 13:23 cnv + dr-xr-xr-x 2 root root 55356 Nov 25 13:23 seq + +The size of the zone files sub-directories indicate the number of files +existing for each type of zones. In this example, there is only one +conventional zone file (all conventional zones are aggregated under a single +file):: + + # ls -l /mnt/cnv + total 137101312 + -rw-r----- 1 root root 140391743488 Nov 25 13:23 0 + +This aggregated conventional zone file can be used as a regular file:: + + # mkfs.ext4 /mnt/cnv/0 + # mount -o loop /mnt/cnv/0 /data + +The "seq" sub-directory grouping files for sequential write zones has in this +example 55356 zones:: + + # ls -lv /mnt/seq + total 14511243264 + -rw-r----- 1 root root 0 Nov 25 13:23 0 + -rw-r----- 1 root root 0 Nov 25 13:23 1 + -rw-r----- 1 root root 0 Nov 25 13:23 2 + ... + -rw-r----- 1 root root 0 Nov 25 13:23 55354 + -rw-r----- 1 root root 0 Nov 25 13:23 55355 + +For sequential write zone files, the file size changes as data is appended at +the end of the file, similarly to any regular file system:: + + # dd if=/dev/zero of=/mnt/seq/0 bs=4096 count=1 conv=notrunc oflag=direct + 1+0 records in + 1+0 records out + 4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00044121 s, 9.3 MB/s + + # ls -l /mnt/seq/0 + -rw-r----- 1 root root 4096 Nov 25 13:23 /mnt/seq/0 + +The written file can be truncated to the zone size, preventing any further +write operation:: + + # truncate -s 268435456 /mnt/seq/0 + # ls -l /mnt/seq/0 + -rw-r----- 1 root root 268435456 Nov 25 13:49 /mnt/seq/0 + +Truncation to 0 size allows freeing the file zone storage space and restart +append-writes to the file:: + + # truncate -s 0 /mnt/seq/0 + # ls -l /mnt/seq/0 + -rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0 + +Since files are statically mapped to zones on the disk, the number of blocks of +a file as reported by stat() and fstat() indicates the size of the file zone:: + + # stat /mnt/seq/0 + File: /mnt/seq/0 + Size: 0 Blocks: 524288 IO Block: 4096 regular empty file + Device: 870h/2160d Inode: 50431 Links: 1 + Access: (0640/-rw-r-----) Uid: ( 0/ root) Gid: ( 0/ root) + Access: 2019-11-25 13:23:57.048971997 +0900 + Modify: 2019-11-25 13:52:25.553805765 +0900 + Change: 2019-11-25 13:52:25.553805765 +0900 + Birth: - + +The number of blocks of the file ("Blocks") in units of 512B blocks gives the +maximum file size of 524288 * 512 B = 256 MB, corresponding to the device zone +size in this example. Of note is that the "IO block" field always indicates the +minimum I/O size for writes and corresponds to the device physical sector size. diff --git a/Documentation/filesystems/zonefs.txt b/Documentation/filesystems/zonefs.txt deleted file mode 100644 index 935bf22031ca..000000000000 --- a/Documentation/filesystems/zonefs.txt +++ /dev/null @@ -1,404 +0,0 @@ -ZoneFS - Zone filesystem for Zoned block devices - -Introduction -============ - -zonefs is a very simple file system exposing each zone of a zoned block device -as a file. Unlike a regular POSIX-compliant file system with native zoned block -device support (e.g. f2fs), zonefs does not hide the sequential write -constraint of zoned block devices to the user. Files representing sequential -write zones of the device must be written sequentially starting from the end -of the file (append only writes). - -As such, zonefs is in essence closer to a raw block device access interface -than to a full-featured POSIX file system. The goal of zonefs is to simplify -the implementation of zoned block device support in applications by replacing -raw block device file accesses with a richer file API, avoiding relying on -direct block device file ioctls which may be more obscure to developers. One -example of this approach is the implementation of LSM (log-structured merge) -tree structures (such as used in RocksDB and LevelDB) on zoned block devices -by allowing SSTables to be stored in a zone file similarly to a regular file -system rather than as a range of sectors of the entire disk. The introduction -of the higher level construct "one file is one zone" can help reducing the -amount of changes needed in the application as well as introducing support for -different application programming languages. - -Zoned block devices -------------------- - -Zoned storage devices belong to a class of storage devices with an address -space that is divided into zones. A zone is a group of consecutive LBAs and all -zones are contiguous (there are no LBA gaps). Zones may have different types. -* Conventional zones: there are no access constraints to LBAs belonging to - conventional zones. Any read or write access can be executed, similarly to a - regular block device. -* Sequential zones: these zones accept random reads but must be written - sequentially. Each sequential zone has a write pointer maintained by the - device that keeps track of the mandatory start LBA position of the next write - to the device. As a result of this write constraint, LBAs in a sequential zone - cannot be overwritten. Sequential zones must first be erased using a special - command (zone reset) before rewriting. - -Zoned storage devices can be implemented using various recording and media -technologies. The most common form of zoned storage today uses the SCSI Zoned -Block Commands (ZBC) and Zoned ATA Commands (ZAC) interfaces on Shingled -Magnetic Recording (SMR) HDDs. - -Solid State Disks (SSD) storage devices can also implement a zoned interface -to, for instance, reduce internal write amplification due to garbage collection. -The NVMe Zoned NameSpace (ZNS) is a technical proposal of the NVMe standard -committee aiming at adding a zoned storage interface to the NVMe protocol. - -Zonefs Overview -=============== - -Zonefs exposes the zones of a zoned block device as files. The files -representing zones are grouped by zone type, which are themselves represented -by sub-directories. This file structure is built entirely using zone information -provided by the device and so does not require any complex on-disk metadata -structure. - -On-disk metadata ----------------- - -zonefs on-disk metadata is reduced to an immutable super block which -persistently stores a magic number and optional feature flags and values. On -mount, zonefs uses blkdev_report_zones() to obtain the device zone configuration -and populates the mount point with a static file tree solely based on this -information. File sizes come from the device zone type and write pointer -position managed by the device itself. - -The super block is always written on disk at sector 0. The first zone of the -device storing the super block is never exposed as a zone file by zonefs. If -the zone containing the super block is a sequential zone, the mkzonefs format -tool always "finishes" the zone, that is, it transitions the zone to a full -state to make it read-only, preventing any data write. - -Zone type sub-directories -------------------------- - -Files representing zones of the same type are grouped together under the same -sub-directory automatically created on mount. - -For conventional zones, the sub-directory "cnv" is used. This directory is -however created if and only if the device has usable conventional zones. If -the device only has a single conventional zone at sector 0, the zone will not -be exposed as a file as it will be used to store the zonefs super block. For -such devices, the "cnv" sub-directory will not be created. - -For sequential write zones, the sub-directory "seq" is used. - -These two directories are the only directories that exist in zonefs. Users -cannot create other directories and cannot rename nor delete the "cnv" and -"seq" sub-directories. - -The size of the directories indicated by the st_size field of struct stat, -obtained with the stat() or fstat() system calls, indicates the number of files -existing under the directory. - -Zone files ----------- - -Zone files are named using the number of the zone they represent within the set -of zones of a particular type. That is, both the "cnv" and "seq" directories -contain files named "0", "1", "2", ... The file numbers also represent -increasing zone start sector on the device. - -All read and write operations to zone files are not allowed beyond the file -maximum size, that is, beyond the zone size. Any access exceeding the zone -size is failed with the -EFBIG error. - -Creating, deleting, renaming or modifying any attribute of files and -sub-directories is not allowed. - -The number of blocks of a file as reported by stat() and fstat() indicates the -size of the file zone, or in other words, the maximum file size. - -Conventional zone files ------------------------ - -The size of conventional zone files is fixed to the size of the zone they -represent. Conventional zone files cannot be truncated. - -These files can be randomly read and written using any type of I/O operation: -buffered I/Os, direct I/Os, memory mapped I/Os (mmap), etc. There are no I/O -constraint for these files beyond the file size limit mentioned above. - -Sequential zone files ---------------------- - -The size of sequential zone files grouped in the "seq" sub-directory represents -the file's zone write pointer position relative to the zone start sector. - -Sequential zone files can only be written sequentially, starting from the file -end, that is, write operations can only be append writes. Zonefs makes no -attempt at accepting random writes and will fail any write request that has a -start offset not corresponding to the end of the file, or to the end of the last -write issued and still in-flight (for asynchrnous I/O operations). - -Since dirty page writeback by the page cache does not guarantee a sequential -write pattern, zonefs prevents buffered writes and writeable shared mappings -on sequential files. Only direct I/O writes are accepted for these files. -zonefs relies on the sequential delivery of write I/O requests to the device -implemented by the block layer elevator. An elevator implementing the sequential -write feature for zoned block device (ELEVATOR_F_ZBD_SEQ_WRITE elevator feature) -must be used. This type of elevator (e.g. mq-deadline) is the set by default -for zoned block devices on device initialization. - -There are no restrictions on the type of I/O used for read operations in -sequential zone files. Buffered I/Os, direct I/Os and shared read mappings are -all accepted. - -Truncating sequential zone files is allowed only down to 0, in which case, the -zone is reset to rewind the file zone write pointer position to the start of -the zone, or up to the zone size, in which case the file's zone is transitioned -to the FULL state (finish zone operation). - -Format options --------------- - -Several optional features of zonefs can be enabled at format time. -* Conventional zone aggregation: ranges of contiguous conventional zones can be - aggregated into a single larger file instead of the default one file per zone. -* File ownership: The owner UID and GID of zone files is by default 0 (root) - but can be changed to any valid UID/GID. -* File access permissions: the default 640 access permissions can be changed. - -IO error handling ------------------ - -Zoned block devices may fail I/O requests for reasons similar to regular block -devices, e.g. due to bad sectors. However, in addition to such known I/O -failure pattern, the standards governing zoned block devices behavior define -additional conditions that result in I/O errors. - -* A zone may transition to the read-only condition (BLK_ZONE_COND_READONLY): - While the data already written in the zone is still readable, the zone can - no longer be written. No user action on the zone (zone management command or - read/write access) can change the zone condition back to a normal read/write - state. While the reasons for the device to transition a zone to read-only - state are not defined by the standards, a typical cause for such transition - would be a defective write head on an HDD (all zones under this head are - changed to read-only). - -* A zone may transition to the offline condition (BLK_ZONE_COND_OFFLINE): - An offline zone cannot be read nor written. No user action can transition an - offline zone back to an operational good state. Similarly to zone read-only - transitions, the reasons for a drive to transition a zone to the offline - condition are undefined. A typical cause would be a defective read-write head - on an HDD causing all zones on the platter under the broken head to be - inaccessible. - -* Unaligned write errors: These errors result from the host issuing write - requests with a start sector that does not correspond to a zone write pointer - position when the write request is executed by the device. Even though zonefs - enforces sequential file write for sequential zones, unaligned write errors - may still happen in the case of a partial failure of a very large direct I/O - operation split into multiple BIOs/requests or asynchronous I/O operations. - If one of the write request within the set of sequential write requests - issued to the device fails, all write requests after queued after it will - become unaligned and fail. - -* Delayed write errors: similarly to regular block devices, if the device side - write cache is enabled, write errors may occur in ranges of previously - completed writes when the device write cache is flushed, e.g. on fsync(). - Similarly to the previous immediate unaligned write error case, delayed write - errors can propagate through a stream of cached sequential data for a zone - causing all data to be dropped after the sector that caused the error. - -All I/O errors detected by zonefs are notified to the user with an error code -return for the system call that trigered or detected the error. The recovery -actions taken by zonefs in response to I/O errors depend on the I/O type (read -vs write) and on the reason for the error (bad sector, unaligned writes or zone -condition change). - -* For read I/O errors, zonefs does not execute any particular recovery action, - but only if the file zone is still in a good condition and there is no - inconsistency between the file inode size and its zone write pointer position. - If a problem is detected, I/O error recovery is executed (see below table). - -* For write I/O errors, zonefs I/O error recovery is always executed. - -* A zone condition change to read-only or offline also always triggers zonefs - I/O error recovery. - -Zonefs minimal I/O error recovery may change a file size and a file access -permissions. - -* File size changes: - Immediate or delayed write errors in a sequential zone file may cause the file - inode size to be inconsistent with the amount of data successfully written in - the file zone. For instance, the partial failure of a multi-BIO large write - operation will cause the zone write pointer to advance partially, even though - the entire write operation will be reported as failed to the user. In such - case, the file inode size must be advanced to reflect the zone write pointer - change and eventually allow the user to restart writing at the end of the - file. - A file size may also be reduced to reflect a delayed write error detected on - fsync(): in this case, the amount of data effectively written in the zone may - be less than originally indicated by the file inode size. After such I/O - error, zonefs always fixes a file inode size to reflect the amount of data - persistently stored in the file zone. - -* Access permission changes: - A zone condition change to read-only is indicated with a change in the file - access permissions to render the file read-only. This disables changes to the - file attributes and data modification. For offline zones, all permissions - (read and write) to the file are disabled. - -Further action taken by zonefs I/O error recovery can be controlled by the user -with the "errors=xxx" mount option. The table below summarizes the result of -zonefs I/O error processing depending on the mount option and on the zone -conditions. - - +--------------+-----------+-----------------------------------------+ - | | | Post error state | - | "errors=xxx" | device | access permissions | - | mount | zone | file file device zone | - | option | condition | size read write read write | - +--------------+-----------+-----------------------------------------+ - | | good | fixed yes no yes yes | - | remount-ro | read-only | fixed yes no yes no | - | (default) | offline | 0 no no no no | - +--------------+-----------+-----------------------------------------+ - | | good | fixed yes no yes yes | - | zone-ro | read-only | fixed yes no yes no | - | | offline | 0 no no no no | - +--------------+-----------+-----------------------------------------+ - | | good | 0 no no yes yes | - | zone-offline | read-only | 0 no no yes no | - | | offline | 0 no no no no | - +--------------+-----------+-----------------------------------------+ - | | good | fixed yes yes yes yes | - | repair | read-only | fixed yes no yes no | - | | offline | 0 no no no no | - +--------------+-----------+-----------------------------------------+ - -Further notes: -* The "errors=remount-ro" mount option is the default behavior of zonefs I/O - error processing if no errors mount option is specified. -* With the "errors=remount-ro" mount option, the change of the file access - permissions to read-only applies to all files. The file system is remounted - read-only. -* Access permission and file size changes due to the device transitioning zones - to the offline condition are permanent. Remounting or reformating the device - with mkfs.zonefs (mkzonefs) will not change back offline zone files to a good - state. -* File access permission changes to read-only due to the device transitioning - zones to the read-only condition are permanent. Remounting or reformating - the device will not re-enable file write access. -* File access permission changes implied by the remount-ro, zone-ro and - zone-offline mount options are temporary for zones in a good condition. - Unmounting and remounting the file system will restore the previous default - (format time values) access rights to the files affected. -* The repair mount option triggers only the minimal set of I/O error recovery - actions, that is, file size fixes for zones in a good condition. Zones - indicated as being read-only or offline by the device still imply changes to - the zone file access permissions as noted in the table above. - -Mount options -------------- - -zonefs define the "errors=" mount option to allow the user to specify -zonefs behavior in response to I/O errors, inode size inconsistencies or zone -condition chages. The defined behaviors are as follow: -* remount-ro (default) -* zone-ro -* zone-offline -* repair - -The I/O error actions defined for each behavior is detailed in the previous -section. - -Zonefs User Space Tools -======================= - -The mkzonefs tool is used to format zoned block devices for use with zonefs. -This tool is available on Github at: - -https://github.com/damien-lemoal/zonefs-tools - -zonefs-tools also includes a test suite which can be run against any zoned -block device, including null_blk block device created with zoned mode. - -Examples --------- - -The following formats a 15TB host-managed SMR HDD with 256 MB zones -with the conventional zones aggregation feature enabled. - -# mkzonefs -o aggr_cnv /dev/sdX -# mount -t zonefs /dev/sdX /mnt -# ls -l /mnt/ -total 0 -dr-xr-xr-x 2 root root 1 Nov 25 13:23 cnv -dr-xr-xr-x 2 root root 55356 Nov 25 13:23 seq - -The size of the zone files sub-directories indicate the number of files -existing for each type of zones. In this example, there is only one -conventional zone file (all conventional zones are aggregated under a single -file). - -# ls -l /mnt/cnv -total 137101312 --rw-r----- 1 root root 140391743488 Nov 25 13:23 0 - -This aggregated conventional zone file can be used as a regular file. - -# mkfs.ext4 /mnt/cnv/0 -# mount -o loop /mnt/cnv/0 /data - -The "seq" sub-directory grouping files for sequential write zones has in this -example 55356 zones. - -# ls -lv /mnt/seq -total 14511243264 --rw-r----- 1 root root 0 Nov 25 13:23 0 --rw-r----- 1 root root 0 Nov 25 13:23 1 --rw-r----- 1 root root 0 Nov 25 13:23 2 -... --rw-r----- 1 root root 0 Nov 25 13:23 55354 --rw-r----- 1 root root 0 Nov 25 13:23 55355 - -For sequential write zone files, the file size changes as data is appended at -the end of the file, similarly to any regular file system. - -# dd if=/dev/zero of=/mnt/seq/0 bs=4096 count=1 conv=notrunc oflag=direct -1+0 records in -1+0 records out -4096 bytes (4.1 kB, 4.0 KiB) copied, 0.00044121 s, 9.3 MB/s - -# ls -l /mnt/seq/0 --rw-r----- 1 root root 4096 Nov 25 13:23 /mnt/seq/0 - -The written file can be truncated to the zone size, preventing any further -write operation. - -# truncate -s 268435456 /mnt/seq/0 -# ls -l /mnt/seq/0 --rw-r----- 1 root root 268435456 Nov 25 13:49 /mnt/seq/0 - -Truncation to 0 size allows freeing the file zone storage space and restart -append-writes to the file. - -# truncate -s 0 /mnt/seq/0 -# ls -l /mnt/seq/0 --rw-r----- 1 root root 0 Nov 25 13:49 /mnt/seq/0 - -Since files are statically mapped to zones on the disk, the number of blocks of -a file as reported by stat() and fstat() indicates the size of the file zone. - -# stat /mnt/seq/0 - File: /mnt/seq/0 - Size: 0 Blocks: 524288 IO Block: 4096 regular empty file -Device: 870h/2160d Inode: 50431 Links: 1 -Access: (0640/-rw-r-----) Uid: ( 0/ root) Gid: ( 0/ root) -Access: 2019-11-25 13:23:57.048971997 +0900 -Modify: 2019-11-25 13:52:25.553805765 +0900 -Change: 2019-11-25 13:52:25.553805765 +0900 - Birth: - - -The number of blocks of the file ("Blocks") in units of 512B blocks gives the -maximum file size of 524288 * 512 B = 256 MB, corresponding to the device zone -size in this example. Of note is that the "IO block" field always indicates the -minimum I/O size for writes and corresponds to the device physical sector size. -- cgit v1.2.3 From 76897807dc79747161429c984528cf8c6670b328 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 25 Feb 2020 10:03:28 +0900 Subject: dt-bindings: clock: Convert UniPhier clock to json-schema Convert the UniPhier clock controller binding to DT schema format. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../bindings/clock/socionext,uniphier-clock.yaml | 94 +++++++++++++++ .../devicetree/bindings/clock/uniphier-clock.txt | 132 --------------------- 2 files changed, 94 insertions(+), 132 deletions(-) create mode 100644 Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml delete mode 100644 Documentation/devicetree/bindings/clock/uniphier-clock.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml b/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml new file mode 100644 index 000000000000..c3930edc410f --- /dev/null +++ b/Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/socionext,uniphier-clock.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier clock controller + +maintainers: + - Masahiro Yamada + +properties: + compatible: + oneOf: + - description: System clock + enum: + - socionext,uniphier-ld4-clock + - socionext,uniphier-pro4-clock + - socionext,uniphier-sld8-clock + - socionext,uniphier-pro5-clock + - socionext,uniphier-pxs2-clock + - socionext,uniphier-ld6b-clock + - socionext,uniphier-ld11-clock + - socionext,uniphier-ld20-clock + - socionext,uniphier-pxs3-clock + - description: Media I/O (MIO) clock, SD clock + enum: + - socionext,uniphier-ld4-mio-clock + - socionext,uniphier-pro4-mio-clock + - socionext,uniphier-sld8-mio-clock + - socionext,uniphier-pro5-sd-clock + - socionext,uniphier-pxs2-sd-clock + - socionext,uniphier-ld11-mio-clock + - socionext,uniphier-ld20-sd-clock + - socionext,uniphier-pxs3-sd-clock + - description: Peripheral clock + enum: + - socionext,uniphier-ld4-peri-clock + - socionext,uniphier-pro4-peri-clock + - socionext,uniphier-sld8-peri-clock + - socionext,uniphier-pro5-peri-clock + - socionext,uniphier-pxs2-peri-clock + - socionext,uniphier-ld11-peri-clock + - socionext,uniphier-ld20-peri-clock + - socionext,uniphier-pxs3-peri-clock + + "#clock-cells": + const: 1 + +additionalProperties: false + +required: + - compatible + - "#clock-cells" + +examples: + - | + sysctrl@61840000 { + compatible = "socionext,uniphier-sysctrl", "simple-mfd", "syscon"; + reg = <0x61840000 0x4000>; + + clock { + compatible = "socionext,uniphier-ld11-clock"; + #clock-cells = <1>; + }; + + // other nodes ... + }; + + - | + mioctrl@59810000 { + compatible = "socionext,uniphier-mioctrl", "simple-mfd", "syscon"; + reg = <0x59810000 0x800>; + + clock { + compatible = "socionext,uniphier-ld11-mio-clock"; + #clock-cells = <1>; + }; + + // other nodes ... + }; + + - | + perictrl@59820000 { + compatible = "socionext,uniphier-perictrl", "simple-mfd", "syscon"; + reg = <0x59820000 0x200>; + + clock { + compatible = "socionext,uniphier-ld11-peri-clock"; + #clock-cells = <1>; + }; + + // other nodes ... + }; diff --git a/Documentation/devicetree/bindings/clock/uniphier-clock.txt b/Documentation/devicetree/bindings/clock/uniphier-clock.txt deleted file mode 100644 index 7b5f602765fe..000000000000 --- a/Documentation/devicetree/bindings/clock/uniphier-clock.txt +++ /dev/null @@ -1,132 +0,0 @@ -UniPhier clock controller - - -System clock ------------- - -Required properties: -- compatible: should be one of the following: - "socionext,uniphier-ld4-clock" - for LD4 SoC. - "socionext,uniphier-pro4-clock" - for Pro4 SoC. - "socionext,uniphier-sld8-clock" - for sLD8 SoC. - "socionext,uniphier-pro5-clock" - for Pro5 SoC. - "socionext,uniphier-pxs2-clock" - for PXs2/LD6b SoC. - "socionext,uniphier-ld11-clock" - for LD11 SoC. - "socionext,uniphier-ld20-clock" - for LD20 SoC. - "socionext,uniphier-pxs3-clock" - for PXs3 SoC -- #clock-cells: should be 1. - -Example: - - sysctrl@61840000 { - compatible = "socionext,uniphier-sysctrl", - "simple-mfd", "syscon"; - reg = <0x61840000 0x4000>; - - clock { - compatible = "socionext,uniphier-ld11-clock"; - #clock-cells = <1>; - }; - - other nodes ... - }; - -Provided clocks: - - 8: ST DMAC -12: GIO (Giga bit stream I/O) -14: USB3 ch0 host -15: USB3 ch1 host -16: USB3 ch0 PHY0 -17: USB3 ch0 PHY1 -20: USB3 ch1 PHY0 -21: USB3 ch1 PHY1 - - -Media I/O (MIO) clock, SD clock -------------------------------- - -Required properties: -- compatible: should be one of the following: - "socionext,uniphier-ld4-mio-clock" - for LD4 SoC. - "socionext,uniphier-pro4-mio-clock" - for Pro4 SoC. - "socionext,uniphier-sld8-mio-clock" - for sLD8 SoC. - "socionext,uniphier-pro5-sd-clock" - for Pro5 SoC. - "socionext,uniphier-pxs2-sd-clock" - for PXs2/LD6b SoC. - "socionext,uniphier-ld11-mio-clock" - for LD11 SoC. - "socionext,uniphier-ld20-sd-clock" - for LD20 SoC. - "socionext,uniphier-pxs3-sd-clock" - for PXs3 SoC -- #clock-cells: should be 1. - -Example: - - mioctrl@59810000 { - compatible = "socionext,uniphier-mioctrl", - "simple-mfd", "syscon"; - reg = <0x59810000 0x800>; - - clock { - compatible = "socionext,uniphier-ld11-mio-clock"; - #clock-cells = <1>; - }; - - other nodes ... - }; - -Provided clocks: - - 0: SD ch0 host - 1: eMMC host - 2: SD ch1 host - 7: MIO DMAC - 8: USB2 ch0 host - 9: USB2 ch1 host -10: USB2 ch2 host -12: USB2 ch0 PHY -13: USB2 ch1 PHY -14: USB2 ch2 PHY - - -Peripheral clock ----------------- - -Required properties: -- compatible: should be one of the following: - "socionext,uniphier-ld4-peri-clock" - for LD4 SoC. - "socionext,uniphier-pro4-peri-clock" - for Pro4 SoC. - "socionext,uniphier-sld8-peri-clock" - for sLD8 SoC. - "socionext,uniphier-pro5-peri-clock" - for Pro5 SoC. - "socionext,uniphier-pxs2-peri-clock" - for PXs2/LD6b SoC. - "socionext,uniphier-ld11-peri-clock" - for LD11 SoC. - "socionext,uniphier-ld20-peri-clock" - for LD20 SoC. - "socionext,uniphier-pxs3-peri-clock" - for PXs3 SoC -- #clock-cells: should be 1. - -Example: - - perictrl@59820000 { - compatible = "socionext,uniphier-perictrl", - "simple-mfd", "syscon"; - reg = <0x59820000 0x200>; - - clock { - compatible = "socionext,uniphier-ld11-peri-clock"; - #clock-cells = <1>; - }; - - other nodes ... - }; - -Provided clocks: - - 0: UART ch0 - 1: UART ch1 - 2: UART ch2 - 3: UART ch3 - 4: I2C ch0 - 5: I2C ch1 - 6: I2C ch2 - 7: I2C ch3 - 8: I2C ch4 - 9: I2C ch5 -10: I2C ch6 -- cgit v1.2.3 From d85eed038ef4919933b7f4f9d3b4f49ede4092aa Mon Sep 17 00:00:00 2001 From: Jianxin Pan Date: Mon, 2 Mar 2020 23:54:08 +0800 Subject: dt-bindings: power: Fix dt_binding_check error Missing ';' in the end of secure-monitor example node. Fixes: 165b5fb294e8 ("dt-bindings: power: add Amlogic secure power domains bindings") Reported-by: Rob Herring Signed-off-by: Jianxin Pan Reviewed-by: Neil Armstrong Acked-by: Rob Herring Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/1583164448-83438-1-git-send-email-jianxin.pan@amlogic.com --- Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml b/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml index af32209218bb..bc4e037f3f73 100644 --- a/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml +++ b/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml @@ -36,5 +36,5 @@ examples: compatible = "amlogic,meson-a1-pwrc"; #power-domain-cells = <1>; }; - } + }; -- cgit v1.2.3 From e66ae6cadc8eac2300f8805a08c98544f400021b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 2 Mar 2020 19:02:18 +0200 Subject: drm/i915: fix documentation build after rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit intel_csr.c was moved under display. Fixes: 06d3ff6e7451 ("drm/i915: move intel_csr.[ch] under display/") Reported-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200302170218.16496-1-jani.nikula@intel.com --- Documentation/gpu/i915.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index e539c42a3e78..cc74e24ca3b5 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -207,10 +207,10 @@ DPIO CSR firmware support for DMC ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c +.. kernel-doc:: drivers/gpu/drm/i915/display/intel_csr.c :doc: csr support for dmc -.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c +.. kernel-doc:: drivers/gpu/drm/i915/display/intel_csr.c :internal: Video BIOS Table (VBT) -- cgit v1.2.3 From 83dc7f699a9ccb74a86f628f4b5a02e23b3290df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 2 Mar 2020 14:52:54 +0000 Subject: drm/i915: Fix doclinks Update locations for ./drivers/gpu/drm/i915/i915_vma.h:1: warning: 'Virtual Memory Address' not found ./drivers/gpu/drm/i915/i915_gem_gtt.c:1: warning: 'Global GTT views' not found Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200302145254.520447-1-chris@chris-wilson.co.uk --- Documentation/gpu/i915.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index cc74e24ca3b5..f6d363b6756e 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -332,7 +332,7 @@ This process is dubbed relocation. GEM BO Management Implementation Details ---------------------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/i915_vma.h +.. kernel-doc:: drivers/gpu/drm/i915/i915_vma_types.h :doc: Virtual Memory Address Buffer Object Eviction @@ -382,7 +382,7 @@ Logical Rings, Logical Ring Contexts and Execlists Global GTT views ---------------- -.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_gtt.c +.. kernel-doc:: drivers/gpu/drm/i915/i915_vma_types.h :doc: Global GTT views .. kernel-doc:: drivers/gpu/drm/i915/i915_gem_gtt.c -- cgit v1.2.3 From a1af8d71f0e4c6c23eb809de559150bbff296f4d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 27 Feb 2020 02:53:25 +0900 Subject: kbuild: remove trailing slash from devicetree/binding/ for descending obj-* needs a trailing slash for a directory, but subdir-* does not because it already implies a directory. Also, change subdir-y to subdir- to ensure this is effective only for cleaning targets. This makes the cleaning log consistent. (no trailing slash) Before: $ make clean CLEAN Documentation/devicetree/bindings/ After: $ make clean CLEAN Documentation/devicetree/bindings Signed-off-by: Masahiro Yamada --- Documentation/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/Makefile b/Documentation/Makefile index d77bb607aea4..39569a2e1953 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -2,7 +2,8 @@ # Makefile for Sphinx documentation # -subdir-y := devicetree/bindings/ +# for cleaning +subdir- := devicetree/bindings # Check for broken documentation file references ifeq ($(CONFIG_WARN_MISSING_DOCUMENTS),y) -- cgit v1.2.3 From fcf1b6a35c16ac500fa908a4022238e5d666eabf Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 26 Feb 2020 15:23:36 -0800 Subject: Documentation/llvm: add documentation on building w/ Clang/LLVM Added to kbuild documentation. Provides more official info on building kernels with Clang and LLVM than our wiki. Suggested-by: Kees Cook Reviewed-by: Kees Cook Reviewed-by: Nathan Chancellor Reviewed-by: Sedat Dilek Signed-off-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- Documentation/kbuild/index.rst | 1 + Documentation/kbuild/llvm.rst | 80 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 Documentation/kbuild/llvm.rst (limited to 'Documentation') diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst index 0f144fad99a6..3882bd5f7728 100644 --- a/Documentation/kbuild/index.rst +++ b/Documentation/kbuild/index.rst @@ -19,6 +19,7 @@ Kernel Build System issues reproducible-builds + llvm .. only:: subproject and html diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst new file mode 100644 index 000000000000..d6c79eb4e23e --- /dev/null +++ b/Documentation/kbuild/llvm.rst @@ -0,0 +1,80 @@ +============================== +Building Linux with Clang/LLVM +============================== + +This document covers how to build the Linux kernel with Clang and LLVM +utilities. + +About +----- + +The Linux kernel has always traditionally been compiled with GNU toolchains +such as GCC and binutils. Ongoing work has allowed for `Clang +`_ and `LLVM `_ utilities to be +used as viable substitutes. Distributions such as `Android +`_, `ChromeOS +`_, and `OpenMandriva +`_ use Clang built kernels. `LLVM is a +collection of toolchain components implemented in terms of C++ objects +`_. Clang is a front-end to LLVM that +supports C and the GNU C extensions required by the kernel, and is pronounced +"klang," not "see-lang." + +Clang +----- + +The compiler used can be swapped out via `CC=` command line argument to `make`. +`CC=` should be set when selecting a config and during a build. + + make CC=clang defconfig + + make CC=clang + +Cross Compiling +--------------- + +A single Clang compiler binary will typically contain all supported backends, +which can help simplify cross compiling. + + ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang + +`CROSS_COMPILE` is not used to prefix the Clang compiler binary, instead +`CROSS_COMPILE` is used to set a command line flag: `--target `. For +example: + + clang --target aarch64-linux-gnu foo.c + +LLVM Utilities +-------------- + +LLVM has substitutes for GNU binutils utilities. These can be invoked as +additional parameters to `make`. + + make CC=clang AS=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \\ + OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-objsize \\ + READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \\ + HOSTLD=ld.lld + +Getting Help +------------ + +- `Website `_ +- `Mailing List `_: +- `Issue Tracker `_ +- IRC: #clangbuiltlinux on chat.freenode.net +- `Telegram `_: @ClangBuiltLinux +- `Wiki `_ +- `Beginner Bugs `_ + +Getting LLVM +------------- + +- http://releases.llvm.org/download.html +- https://github.com/llvm/llvm-project +- https://llvm.org/docs/GettingStarted.html +- https://llvm.org/docs/CMake.html +- https://apt.llvm.org/ +- https://www.archlinux.org/packages/extra/x86_64/llvm/ +- https://github.com/ClangBuiltLinux/tc-build +- https://github.com/ClangBuiltLinux/linux/wiki/Building-Clang-from-source +- https://android.googlesource.com/platform/prebuilts/clang/host/linux-x86/ -- cgit v1.2.3 From 2ba06cd8565b5b3dee33a9ca24cf86a906d051d1 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 28 Feb 2020 18:37:30 -0600 Subject: kbuild: Always validate DT binding examples Most folks only run dt_binding_check on the single schema they care about by setting DT_SCHEMA_FILES. That means example is only checked against that one schema which is not always sufficient. Let's address this by splitting processed-schema.yaml into 2 files: one that's always all schemas for the examples and one that's just the schema in DT_SCHEMA_FILES for dtbs. Co-developed-by: Masahiro Yamada Signed-off-by: Rob Herring Signed-off-by: Masahiro Yamada --- Documentation/devicetree/bindings/.gitignore | 2 +- Documentation/devicetree/bindings/Makefile | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/.gitignore b/Documentation/devicetree/bindings/.gitignore index ef82fcfcccab..57afa1533a5f 100644 --- a/Documentation/devicetree/bindings/.gitignore +++ b/Documentation/devicetree/bindings/.gitignore @@ -1,2 +1,2 @@ *.example.dts -processed-schema.yaml +processed-schema*.yaml diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index 646cb3525373..7c40d5ba1b51 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -2,7 +2,6 @@ DT_DOC_CHECKER ?= dt-doc-validate DT_EXTRACT_EX ?= dt-extract-example DT_MK_SCHEMA ?= dt-mk-schema -DT_MK_SCHEMA_FLAGS := $(if $(DT_SCHEMA_FILES), -u) quiet_cmd_chk_binding = CHKDT $(patsubst $(srctree)/%,%,$<) cmd_chk_binding = $(DT_DOC_CHECKER) -u $(srctree)/$(src) $< ; \ @@ -11,26 +10,33 @@ quiet_cmd_chk_binding = CHKDT $(patsubst $(srctree)/%,%,$<) $(obj)/%.example.dts: $(src)/%.yaml FORCE $(call if_changed,chk_binding) -DT_TMP_SCHEMA := processed-schema.yaml +# Use full schemas when checking %.example.dts +DT_TMP_SCHEMA := $(obj)/processed-schema-examples.yaml quiet_cmd_mk_schema = SCHEMA $@ cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(real-prereqs) -DT_DOCS = $(shell \ +DT_DOCS = $(addprefix $(src)/, \ + $(shell \ cd $(srctree)/$(src) && \ find * \( -name '*.yaml' ! \ - -name $(DT_TMP_SCHEMA) ! \ + -name 'processed-schema*' ! \ -name '*.example.dt.yaml' \) \ - ) + )) -DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS)) +DT_SCHEMA_FILES ?= $(DT_DOCS) ifeq ($(CHECK_DTBS),) extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES)) extra-y += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES)) +extra-y += processed-schema-examples.yaml + +$(obj)/processed-schema-examples.yaml: $(DT_DOCS) FORCE + $(call if_changed,mk_schema) endif -$(obj)/$(DT_TMP_SCHEMA): $(DT_SCHEMA_FILES) FORCE +$(obj)/processed-schema.yaml: DT_MK_SCHEMA_FLAGS := -u +$(obj)/processed-schema.yaml: $(DT_SCHEMA_FILES) FORCE $(call if_changed,mk_schema) -extra-y += $(DT_TMP_SCHEMA) +extra-y += processed-schema.yaml -- cgit v1.2.3 From 65220630bb1766227c29a7774b95af3916f1564b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 28 Feb 2020 18:37:31 -0600 Subject: kbuild: Build DT binding examples with dtc warnings enabled Now that we have a separate rule for DT binding examples, we can customize the dtc options. Let's adjust the dtc warnings to me more strict by default so the examples get cleaned up as they get converted to schema. Leaving 'avoid_unnecessary_addr_size' and 'graph_child_address' warnings disabled as examples tend to be incomplete and they generates a lot of warnings. Co-developed-by: Masahiro Yamada Signed-off-by: Rob Herring Signed-off-by: Masahiro Yamada --- Documentation/devicetree/bindings/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index 7c40d5ba1b51..b62c0470f122 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -31,6 +31,10 @@ extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES)) extra-y += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES)) extra-y += processed-schema-examples.yaml +override DTC_FLAGS := \ + -Wno-avoid_unnecessary_addr_size \ + -Wno-graph_child_address + $(obj)/processed-schema-examples.yaml: $(DT_DOCS) FORCE $(call if_changed,mk_schema) endif -- cgit v1.2.3 From c77af39bdb8b0f0605b484428a65ec0d97ba18dc Mon Sep 17 00:00:00 2001 From: Odelu Kukatla Date: Tue, 3 Mar 2020 19:02:52 +0200 Subject: dt-bindings: interconnect: Add Qualcomm SC7180 DT bindings The Qualcomm SC7180 platform has several bus fabrics that could be controlled and tuned dynamically according to the bandwidth demand. Signed-off-by: Odelu Kukatla Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/1583241493-21212-2-git-send-email-okukatla@codeaurora.org Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,sc7180.yaml | 85 ++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,sc7180.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc7180.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc7180.yaml new file mode 100644 index 000000000000..50f78f87f3fb --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc7180.yaml @@ -0,0 +1,85 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,sc7180.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm SC7180 Network-On-Chip Interconnect + +maintainers: + - Odelu Kukatla + +description: | + SC7180 interconnect providers support system bandwidth requirements through + RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is + able to communicate with the BCM through the Resource State Coordinator (RSC) + associated with each execution environment. Provider nodes must point to at + least one RPMh device child node pertaining to their RSC and each provider + can map to multiple RPMh resources. + +properties: + reg: + maxItems: 1 + + compatible: + enum: + - qcom,sc7180-aggre1-noc + - qcom,sc7180-aggre2-noc + - qcom,sc7180-camnoc-virt + - qcom,sc7180-compute-noc + - qcom,sc7180-config-noc + - qcom,sc7180-dc-noc + - qcom,sc7180-gem-noc + - qcom,sc7180-ipa-virt + - qcom,sc7180-mc-virt + - qcom,sc7180-mmss-noc + - qcom,sc7180-npu-noc + - qcom,sc7180-qup-virt + - qcom,sc7180-system-noc + + '#interconnect-cells': + const: 1 + + qcom,bcm-voters: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: | + List of phandles to qcom,bcm-voter nodes that are required by + this interconnect to send RPMh commands. + + qcom,bcm-voter-names: + $ref: /schemas/types.yaml#/definitions/string-array + description: | + Names for each of the qcom,bcm-voters specified. + +required: + - compatible + - reg + - '#interconnect-cells' + - qcom,bcm-voters + +additionalProperties: false + +examples: + - | + #include + + config_noc: interconnect@1500000 { + compatible = "qcom,sc7180-config-noc"; + reg = <0 0x01500000 0 0x28000>; + #interconnect-cells = <1>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + + system_noc: interconnect@1620000 { + compatible = "qcom,sc7180-system-noc"; + reg = <0 0x01620000 0 0x17080>; + #interconnect-cells = <1>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + + mmss_noc: interconnect@1740000 { + compatible = "qcom,sc7180-mmss-noc"; + reg = <0 0x01740000 0 0x1c100>; + #interconnect-cells = <1>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; -- cgit v1.2.3 From 7a077f7fdaa4fa641c43a554db2811c62048d7f4 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Tue, 3 Mar 2020 19:02:52 +0200 Subject: dt-bindings: interconnect: Add OSM L3 DT bindings Add bindings for Operating State Manager (OSM) L3 interconnect provider on SDM845 SoCs. Reviewed-by: Rob Herring Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/20200227105632.15041-3-sibis@codeaurora.org Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,osm-l3.yaml | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml new file mode 100644 index 000000000000..b4d46a1e9257 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,osm-l3.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Operating State Manager (OSM) L3 Interconnect Provider + +maintainers: + - Sibi Sankar + +description: + L3 cache bandwidth requirements on Qualcomm SoCs is serviced by the OSM. + The OSM L3 interconnect provider aggregates the L3 bandwidth requests + from CPU/GPU and relays it to the OSM. + +properties: + compatible: + enum: + - qcom,sdm845-osm-l3 + + reg: + maxItems: 1 + + clocks: + items: + - description: xo clock + - description: alternate clock + + clock-names: + items: + - const: xo + - const: alternate + + '#interconnect-cells': + const: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - '#interconnect-cells' + +additionalProperties: false + +examples: + - | + + #define GPLL0 165 + #define RPMH_CXO_CLK 0 + + osm_l3: interconnect@17d41000 { + compatible = "qcom,sdm845-osm-l3"; + reg = <0x17d41000 0x1400>; + + clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>; + clock-names = "xo", "alternate"; + + #interconnect-cells = <1>; + }; -- cgit v1.2.3 From ff3edec1c3fd9bec103f0055406f9732b4919ea8 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Tue, 3 Mar 2020 19:02:52 +0200 Subject: dt-bindings: interconnect: Add OSM L3 DT binding on SC7180 Add OSM L3 interconnect provider binding on SC7180 SoCs. Acked-by: Rob Herring Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/20200227105632.15041-5-sibis@codeaurora.org Signed-off-by: Georgi Djakov --- Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml index b4d46a1e9257..91f70c9067d1 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml @@ -17,6 +17,7 @@ description: properties: compatible: enum: + - qcom,sc7180-osm-l3 - qcom,sdm845-osm-l3 reg: -- cgit v1.2.3 From b396bfdebffcc05a855137775e38f4652cbca454 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 12 Feb 2020 12:21:03 -0500 Subject: tracing: Have hwlat ts be first instance and record count of instances The hwlat tracer runs a loop of width time during a given window. It then reports the max latency over a given threshold and records a timestamp. But this timestamp is the time after the width has finished, and not the time it actually triggered. Record the actual time when the latency was greater than the threshold as well as the number of times it was greater in a given width per window. Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/ftrace.rst | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index ff658e27d25b..99a0890e20ec 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -2126,6 +2126,8 @@ periodically make a CPU constantly busy with interrupts disabled. # cat trace # tracer: hwlat # + # entries-in-buffer/entries-written: 13/13 #P:8 + # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq @@ -2133,12 +2135,18 @@ periodically make a CPU constantly busy with interrupts disabled. # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | - <...>-3638 [001] d... 19452.055471: #1 inner/outer(us): 12/14 ts:1499801089.066141940 - <...>-3638 [003] d... 19454.071354: #2 inner/outer(us): 11/9 ts:1499801091.082164365 - <...>-3638 [002] dn.. 19461.126852: #3 inner/outer(us): 12/9 ts:1499801098.138150062 - <...>-3638 [001] d... 19488.340960: #4 inner/outer(us): 8/12 ts:1499801125.354139633 - <...>-3638 [003] d... 19494.388553: #5 inner/outer(us): 8/12 ts:1499801131.402150961 - <...>-3638 [003] d... 19501.283419: #6 inner/outer(us): 0/12 ts:1499801138.297435289 nmi-total:4 nmi-count:1 + <...>-1729 [001] d... 678.473449: #1 inner/outer(us): 11/12 ts:1581527483.343962693 count:6 + <...>-1729 [004] d... 689.556542: #2 inner/outer(us): 16/9 ts:1581527494.889008092 count:1 + <...>-1729 [005] d... 714.756290: #3 inner/outer(us): 16/16 ts:1581527519.678961629 count:5 + <...>-1729 [001] d... 718.788247: #4 inner/outer(us): 9/17 ts:1581527523.889012713 count:1 + <...>-1729 [002] d... 719.796341: #5 inner/outer(us): 13/9 ts:1581527524.912872606 count:1 + <...>-1729 [006] d... 844.787091: #6 inner/outer(us): 9/12 ts:1581527649.889048502 count:2 + <...>-1729 [003] d... 849.827033: #7 inner/outer(us): 18/9 ts:1581527654.889013793 count:1 + <...>-1729 [007] d... 853.859002: #8 inner/outer(us): 9/12 ts:1581527658.889065736 count:1 + <...>-1729 [001] d... 855.874978: #9 inner/outer(us): 9/11 ts:1581527660.861991877 count:1 + <...>-1729 [001] d... 863.938932: #10 inner/outer(us): 9/11 ts:1581527668.970010500 count:1 nmi-total:7 nmi-count:1 + <...>-1729 [007] d... 878.050780: #11 inner/outer(us): 9/12 ts:1581527683.385002600 count:1 nmi-total:5 nmi-count:1 + <...>-1729 [007] d... 886.114702: #12 inner/outer(us): 9/12 ts:1581527691.385001600 count:1 The above output is somewhat the same in the header. All events will have @@ -2148,7 +2156,7 @@ interrupts disabled 'd'. Under the FUNCTION title there is: This is the count of events recorded that were greater than the tracing_threshold (See below). - inner/outer(us): 12/14 + inner/outer(us): 11/11 This shows two numbers as "inner latency" and "outer latency". The test runs in a loop checking a timestamp twice. The latency detected within @@ -2156,11 +2164,15 @@ interrupts disabled 'd'. Under the FUNCTION title there is: after the previous timestamp and the next timestamp in the loop is the "outer latency". - ts:1499801089.066141940 + ts:1581527483.343962693 + + The absolute timestamp that the first latency was recorded in the window. + + count:6 - The absolute timestamp that the event happened. + The number of times a latency was detected during the window. - nmi-total:4 nmi-count:1 + nmi-total:7 nmi-count:1 On architectures that support it, if an NMI comes in during the test, the time spent in NMI is reported in "nmi-total" (in -- cgit v1.2.3 From c04d102ba56ee305791edab08958fa35f39a3f8b Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 3 Mar 2020 13:29:42 +0000 Subject: doc: sfp-phylink: correct code indentation Using vim to edit the phylink documentation reveals some mistakes due to the "invisible" pythonesque white space indentation that can't be seen with other editors. Fix it. Signed-off-by: Russell King Signed-off-by: David S. Miller --- Documentation/networking/sfp-phylink.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst index 8d7af28cd835..5aec7c8857d0 100644 --- a/Documentation/networking/sfp-phylink.rst +++ b/Documentation/networking/sfp-phylink.rst @@ -138,27 +138,27 @@ this documentation. .. code-block:: c - static int foo_ethtool_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) - { - struct foo_priv *priv = netdev_priv(dev); - - return phylink_ethtool_ksettings_set(priv->phylink, cmd); - } - - static int foo_ethtool_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) - { - struct foo_priv *priv = netdev_priv(dev); + static int foo_ethtool_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) + { + struct foo_priv *priv = netdev_priv(dev); + + return phylink_ethtool_ksettings_set(priv->phylink, cmd); + } - return phylink_ethtool_ksettings_get(priv->phylink, cmd); - } + static int foo_ethtool_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) + { + struct foo_priv *priv = netdev_priv(dev); + + return phylink_ethtool_ksettings_get(priv->phylink, cmd); + } -7. Replace the call to: +7. Replace the call to:: phy_dev = of_phy_connect(dev, node, link_func, flags, phy_interface); - and associated code with a call to: + and associated code with a call to:: err = phylink_of_phy_connect(priv->phylink, node, flags); -- cgit v1.2.3 From 75f81a7ffe4d45b97a14f6d8075bacb8323ac10e Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Wed, 26 Feb 2020 11:57:58 -0800 Subject: usb: typec: Add sysfs node to show cc orientation Export Type-C orientation information when available. - "normal": CC1 orientation - "reverse": CC2 orientation - "unknown": Orientation cannot be determined. Signed-off-by: Badhri Jagan Sridharan Reviewed-by: Heikki Krogerus Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20200226195758.150477-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-typec | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-typec b/Documentation/ABI/testing/sysfs-class-typec index 0c2eb26fdc06..b834671522d6 100644 --- a/Documentation/ABI/testing/sysfs-class-typec +++ b/Documentation/ABI/testing/sysfs-class-typec @@ -108,6 +108,15 @@ Contact: Heikki Krogerus Description: Revision number of the supported USB Type-C specification. +What: /sys/class/typec//orientation +Date: February 2020 +Contact: Badhri Jagan Sridharan +Description: + Indicates the active orientation of the Type-C connector. + Valid values: + - "normal": CC1 orientation + - "reverse": CC2 orientation + - "unknown": Orientation cannot be determined. USB Type-C partner devices (eg. /sys/class/typec/port0-partner/) -- cgit v1.2.3 From c111566bea7ccd8a05e2c56f1fb3cbb6f4b7b441 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 25 Feb 2020 11:31:02 +0200 Subject: PM: runtime: Add pm_runtime_get_if_active() pm_runtime_get_if_in_use() bumps up the PM-runtime usage count if it is not equal to zero and the device's PM-runtime status is 'active'. This works for drivers that do not use autoidle, but for those that do, the function returns zero even when the device is active. In order to maintain sane device state while the device is powered on in the hope that it'll be needed, pm_runtime_get_if_active(dev, true) returns a positive value if the device's PM-runtime status is 'active' when it is called, in which case it also increments the device's usage count. If the second argument of pm_runtime_get_if_active() is 'false', the function behaves just like pm_runtime_get_if_in_use(), so redefine the latter as a wrapper around the former. Signed-off-by: Sakari Ailus [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.rst | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst index ab8406c84254..0553008b6279 100644 --- a/Documentation/power/runtime_pm.rst +++ b/Documentation/power/runtime_pm.rst @@ -382,6 +382,12 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: nonzero, increment the counter and return 1; otherwise return 0 without changing the counter + `int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count);` + - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the + runtime PM status is RPM_ACTIVE, and either ign_usage_count is true + or the device's usage_count is non-zero, increment the counter and + return 1; otherwise return 0 without changing the counter + `void pm_runtime_put_noidle(struct device *dev);` - decrement the device's usage counter -- cgit v1.2.3 From 636a0e4b079483437c1b673eda69b7e4f12b00f9 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Fri, 24 Jan 2020 00:29:37 +0100 Subject: dt-bindings: Add AXG PCIE PHY bindings Add documentation for PCIE PHYs found in AXG SoCs. Signed-off-by: Remi Pommarel Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../bindings/phy/amlogic,meson-axg-pcie.yaml | 52 ++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/amlogic,meson-axg-pcie.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-axg-pcie.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-axg-pcie.yaml new file mode 100644 index 000000000000..086478aec946 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/amlogic,meson-axg-pcie.yaml @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/phy/amlogic,meson-axg-pcie.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Amlogic AXG PCIE PHY + +maintainers: + - Remi Pommarel + +properties: + compatible: + const: amlogic,axg-pcie-phy + + reg: + maxItems: 1 + + resets: + maxItems: 1 + + phys: + maxItems: 1 + + phy-names: + const: analog + + "#phy-cells": + const: 0 + +required: + - compatible + - reg + - phys + - phy-names + - resets + - "#phy-cells" + +additionalProperties: false + +examples: + - | + #include + #include + pcie_phy: pcie-phy@ff644000 { + compatible = "amlogic,axg-pcie-phy"; + reg = <0x0 0xff644000 0x0 0x1c>; + resets = <&reset RESET_PCIE_PHY>; + phys = <&mipi_analog_phy PHY_TYPE_PCIE>; + phy-names = "analog"; + #phy-cells = <0>; + }; -- cgit v1.2.3 From b09b48b3d09413f557d3006f5618d42e75192dd0 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Fri, 24 Jan 2020 00:29:38 +0100 Subject: dt-bindings: Add AXG shared MIPI/PCIE analog PHY bindings Add documentation for the shared MIPI/PCIE analog PHY found in AXG SoCs. Signed-off-by: Remi Pommarel Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../phy/amlogic,meson-axg-mipi-pcie-analog.yaml | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml b/Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml new file mode 100644 index 000000000000..88683db6cf81 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/amlogic,meson-axg-mipi-pcie-analog.yaml @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/phy/amlogic,meson-axg-mipi-pcie-analog.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: Amlogic AXG shared MIPI/PCIE analog PHY + +maintainers: + - Remi Pommarel + +properties: + compatible: + const: amlogic,axg-mipi-pcie-analog-phy + + reg: + maxItems: 1 + + "#phy-cells": + const: 1 + +required: + - compatible + - reg + - "#phy-cells" + +additionalProperties: false + +examples: + - | + mpphy: phy@0 { + compatible = "amlogic,axg-mipi-pcie-analog-phy"; + reg = <0x0 0x0 0x0 0xc>; + #phy-cells = <1>; + }; -- cgit v1.2.3 From 6e5f77031cc92397aba2d03c9f82bfc511b83467 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Fri, 24 Jan 2020 00:29:39 +0100 Subject: dt-bindings: PCI: meson: Update PCIE bindings documentation Now that a new PHYs has been introduced for AXG SoC family, update dt bindings documentation. Please note that this breaks backward compatibility but as not a single devicetree uses that yet that seems ok. Signed-off-by: Remi Pommarel Signed-off-by: Lorenzo Pieralisi Reviewed-by: Rob Herring --- .../devicetree/bindings/pci/amlogic,meson-pcie.txt | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt b/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt index 84fdc422792e..b6acbe694ffb 100644 --- a/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt +++ b/Documentation/devicetree/bindings/pci/amlogic,meson-pcie.txt @@ -18,7 +18,6 @@ Required properties: - reg-names: Must be - "elbi" External local bus interface registers - "cfg" Meson specific registers - - "phy" Meson PCIE PHY registers for AXG SoC Family - "config" PCIe configuration space - reset-gpios: The GPIO to generate PCIe PERST# assert and deassert signal. - clocks: Must contain an entry for each entry in clock-names. @@ -26,13 +25,13 @@ Required properties: - "pclk" PCIe GEN 100M PLL clock - "port" PCIe_x(A or B) RC clock gate - "general" PCIe Phy clock - - "mipi" PCIe_x(A or B) 100M ref clock gate for AXG SoC Family - resets: phandle to the reset lines. -- reset-names: must contain "phy" "port" and "apb" - - "phy" Share PHY reset for AXG SoC Family +- reset-names: must contain "port" and "apb" - "port" Port A or B reset - "apb" Share APB reset -- phys: should contain a phandle to the shared phy for G12A SoC Family +- phys: should contain a phandle to the PCIE phy +- phy-names: must contain "pcie" + - device_type: should be "pci". As specified in designware-pcie.txt @@ -43,9 +42,8 @@ Example configuration: compatible = "amlogic,axg-pcie", "snps,dw-pcie"; reg = <0x0 0xf9800000 0x0 0x400000 0x0 0xff646000 0x0 0x2000 - 0x0 0xff644000 0x0 0x2000 0x0 0xf9f00000 0x0 0x100000>; - reg-names = "elbi", "cfg", "phy", "config"; + reg-names = "elbi", "cfg", "config"; reset-gpios = <&gpio GPIOX_19 GPIO_ACTIVE_HIGH>; interrupts = ; #interrupt-cells = <1>; @@ -58,17 +56,15 @@ Example configuration: ranges = <0x82000000 0 0 0x0 0xf9c00000 0 0x00300000>; clocks = <&clkc CLKID_USB - &clkc CLKID_MIPI_ENABLE &clkc CLKID_PCIE_A &clkc CLKID_PCIE_CML_EN0>; clock-names = "general", - "mipi", "pclk", "port"; - resets = <&reset RESET_PCIE_PHY>, - <&reset RESET_PCIE_A>, + resets = <&reset RESET_PCIE_A>, <&reset RESET_PCIE_APB>; - reset-names = "phy", - "port", + reset-names = "port", "apb"; + phys = <&pcie_phy>; + phy-names = "pcie"; }; -- cgit v1.2.3 From e177440a1bba09d3ff0b2f992d03dbab2640746f Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Wed, 22 Jan 2020 14:40:24 +0100 Subject: regulator: vqmmc-ipq4019-regulator: add binding document This patch adds bindings for the Qualcomm IPQ4019 VQMMC SD LDO driver. Signed-off-by: Robert Marko Message-Id: <20200122134023.1467806-1-robert.marko@sartura.hr> Signed-off-by: Mark Brown --- .../regulator/vqmmc-ipq4019-regulator.yaml | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 Documentation/devicetree/bindings/regulator/vqmmc-ipq4019-regulator.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/regulator/vqmmc-ipq4019-regulator.yaml b/Documentation/devicetree/bindings/regulator/vqmmc-ipq4019-regulator.yaml new file mode 100644 index 000000000000..d1a79d2ffa1e --- /dev/null +++ b/Documentation/devicetree/bindings/regulator/vqmmc-ipq4019-regulator.yaml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/regulator/vqmmc-ipq4019-regulator.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm IPQ4019 VQMMC SD LDO regulator + +maintainers: + - Robert Marko + +description: | + Qualcomm IPQ4019 SoC-s feature a built a build SD/EMMC controller, + in order to support both 1.8 and 3V I/O voltage levels an LDO + controller is also embedded. + +allOf: + - $ref: "regulator.yaml#" + +properties: + compatible: + const: qcom,vqmmc-ipq4019-regulator + + reg: + maxItems: 1 + +required: + - compatible + - reg + +examples: + - | + regulator@1948000 { + compatible = "qcom,vqmmc-ipq4019-regulator"; + reg = <0x01948000 0x4>; + regulator-name = "vqmmc"; + regulator-min-microvolt = <1500000>; + regulator-max-microvolt = <3000000>; + regulator-always-on; + status = "disabled"; + }; +... -- cgit v1.2.3 From 33fbfb3eaf4eff553bcf82b2c2821324293f577a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 6 Feb 2020 16:02:43 +0100 Subject: dt-bindings: arm: Add Integrator YAML schema This implements the top-level schema for the ARM Integrator platforms. Cc: Sudeep Holla Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../devicetree/bindings/arm/arm,integrator.yaml | 86 ++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/arm,integrator.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/arm,integrator.yaml b/Documentation/devicetree/bindings/arm/arm,integrator.yaml new file mode 100644 index 000000000000..192ded470e32 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/arm,integrator.yaml @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/arm,integrator.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM Integrator Boards Device Tree Bindings + +maintainers: + - Linus Walleij + +description: |+ + These were the first ARM platforms officially supported by ARM Ltd. + They are ARMv4, ARMv5 and ARMv6-capable using different core tiles, + so the system is modular and can host a variety of CPU tiles called + "core tiles" and referred to in the device tree as "core modules". + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - description: ARM Integrator Application Platform, this board has a PCI + host and several PCI slots, as well as a number of slots for logical + expansion modules, it is referred to as an "ASIC Development + Motherboard" and is extended with custom FPGA and is intended for + rapid prototyping. See ARM DUI 0098B. This board can physically come + pre-packaged in a PC Tower form factor called Integrator/PP1 or a + special metal fixture called Integrator/PP2, see ARM DUI 0169A. + items: + - const: arm,integrator-ap + - description: ARM Integrator Compact Platform (HBI-0086), this board has + a compact form factor and mainly consists of the bare minimum + peripherals to make use of the core module. See ARM DUI 0159B. + items: + - const: arm,integrator-cp + - description: ARM Integrator Standard Development Board (SDB) Platform, + this board is a PCI-based board conforming to the Microsoft SDB + (HARP) specification. See ARM DUI 0099A. + items: + - const: arm,integrator-sp + + core-module@10000000: + type: object + description: the root node in the Integrator platforms must contain + a core module child node. They are always at physical address + 0x10000000 in all the Integrator variants. + properties: + compatible: + items: + - const: arm,core-module-integrator + - const: syscon + - const: simple-mfd + reg: + maxItems: 1 + + required: + - compatible + - reg + +patternProperties: + "^syscon@[0-9a-f]+$": + description: All Integrator boards must provide a system controller as a + node in the root of the device tree. + type: object + properties: + compatible: + items: + - enum: + - arm,integrator-ap-syscon + - arm,integrator-cp-syscon + - arm,integrator-sp-syscon + - const: syscon + reg: + maxItems: 1 + + required: + - compatible + - reg + + +required: + - compatible + - core-module@10000000 + +... -- cgit v1.2.3 From 4b900070d50d3ba167a169f7699cde3ef9c3f067 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 6 Feb 2020 16:36:25 +0100 Subject: dt-bindings: arm: Add Versatile YAML schema This implements the top-level schema for the ARM Versatile platforms. Cc: Sudeep Holla Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../devicetree/bindings/arm/arm,versatile.yaml | 71 ++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/arm,versatile.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/arm,versatile.yaml b/Documentation/devicetree/bindings/arm/arm,versatile.yaml new file mode 100644 index 000000000000..06efd2a075c9 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/arm,versatile.yaml @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/arm,versatile.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM Versatile Boards Device Tree Bindings + +maintainers: + - Linus Walleij + +description: |+ + The ARM Versatile boards are two variants of ARM926EJ-S evaluation boards + with various pluggable interface boards, in essence the Versatile PB version + is a superset of the Versatile AB version. + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - description: The ARM Versatile Application Baseboard (HBI-0118) is an + evaluation board specifically for the ARM926EJ-S. It can be connected + to an IB1 interface board for a touchscreen-type use case or an IB2 + for a candybar phone-type use case. See ARM DUI 0225D. + items: + - const: arm,versatile-ab + - description: The ARM Versatile Platform Baseboard (HBI-0117) is an + extension of the Versatile Application Baseboard that includes a + PCI host controller. Like the sibling board, it is done specifically + for ARM926EJ-S. See ARM DUI 0224B. + items: + - const: arm,versatile-pb + + core-module@10000000: + type: object + description: the root node in the Versatile platforms must contain + a core module child node. They are always at physical address + 0x10000000 in all the Versatile variants. + properties: + compatible: + items: + - const: arm,core-module-versatile + - const: syscon + - const: simple-mfd + reg: + maxItems: 1 + + required: + - compatible + - reg + +patternProperties: + "^syscon@[0-9a-f]+$": + type: object + description: When fitted with the IB2 Interface Board, the Versatile + AB will present an optional system controller node which controls the + extra peripherals on the interface board. + properties: + compatible: + contains: + const: arm,versatile-ib2-syscon + required: + - compatible + - reg + +required: + - compatible + - core-module@10000000 + +... -- cgit v1.2.3 From 7db625b9fa758f6b00c5b63ad16f8b2caaa8d1b7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 6 Feb 2020 22:03:11 +0100 Subject: dt-bindings: arm: Add RealView YAML schema This implements the top-level schema for the ARM RealView platforms. Cc: Sudeep Holla Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../devicetree/bindings/arm/arm,realview.yaml | 123 +++++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/arm,realview.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/arm,realview.yaml b/Documentation/devicetree/bindings/arm/arm,realview.yaml new file mode 100644 index 000000000000..d6e85d198afe --- /dev/null +++ b/Documentation/devicetree/bindings/arm/arm,realview.yaml @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/arm,realview.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM RealView Boards Device Tree Bindings + +maintainers: + - Linus Walleij + +description: |+ + The ARM RealView series of reference designs were built to explore the ARM + 11, Cortex A-8 and Cortex A-9 CPUs. This included new features compared to + the earlier CPUs such as TrustZone and multicore (MPCore). + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - description: ARM RealView Emulation Baseboard (HBI-0140) was created + as a generic platform to test different FPGA designs, and has + pluggable CPU modules, see ARM DUI 0303E. + items: + - const: arm,realview-eb + - description: ARM RealView Platform Baseboard for ARM1176JZF-S + (HBI-0147) was created as a development board to test ARM TrustZone, + CoreSight and Intelligent Energy Management (IEM) see ARM DUI 0425F. + items: + - const: arm,realview-pb1176 + - description: ARM RealView Platform Baseboard for ARM 11 MPCore + (HBI-0159, HBI-0175 and HBI-0176) was created to showcase + multiprocessing with ARM11 using MPCore using symmetric + multiprocessing (SMP). See ARM DUI 0351E. + items: + - const: arm,realview-pb11mp + - description: ARM RealView Platform Baseboard for Cortex-A8 (HBI-0178, + HBI-0176 and HBI-0175) was the first reference platform for the + Cortex CPU family, including a Cortex-A8 test chip. + items: + - const: arm,realview-pba8 + - description: ARM RealView Platform Baseboard Explore for Cortex-A9 + (HBI-0182 and HBI-0183) was the reference platform for the Cortex-A9 + CPU. + items: + - const: arm,realview-pbx + + soc: + description: All RealView boards must provide a soc node in the root of the + device tree, representing the System-on-Chip since these test chips are + rather complex. + type: object + properties: + compatible: + oneOf: + - items: + - const: arm,realview-eb-soc + - const: simple-bus + - items: + - const: arm,realview-pb1176-soc + - const: simple-bus + - items: + - const: arm,realview-pb11mp-soc + - const: simple-bus + - items: + - const: arm,realview-pba8-soc + - const: simple-bus + - items: + - const: arm,realview-pbx-soc + - const: simple-bus + + patternProperties: + "^.*syscon@[0-9a-f]+$": + type: object + description: All RealView boards must provide a syscon system controller + node inside the soc node. + properties: + compatible: + oneOf: + - items: + - const: arm,realview-eb11mp-revb-syscon + - const: arm,realview-eb-syscon + - const: syscon + - const: simple-mfd + - items: + - const: arm,realview-eb11mp-revc-syscon + - const: arm,realview-eb-syscon + - const: syscon + - const: simple-mfd + - items: + - const: arm,realview-eb-syscon + - const: syscon + - const: simple-mfd + - items: + - const: arm,realview-pb1176-syscon + - const: syscon + - const: simple-mfd + - items: + - const: arm,realview-pb11mp-syscon + - const: syscon + - const: simple-mfd + - items: + - const: arm,realview-pba8-syscon + - const: syscon + - const: simple-mfd + - items: + - const: arm,realview-pbx-syscon + - const: syscon + - const: simple-mfd + + required: + - compatible + - reg + + required: + - compatible + +required: + - compatible + - soc + +... -- cgit v1.2.3 From 4fb00d9066c13a3cb755d9b05e8de007024ed1a4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 10 Feb 2020 10:17:09 +0100 Subject: dt-bindings: arm: Add Versatile Express and Juno YAML schema This implements the top-level schema for the ARM Versatile Express and Juno platforms. Acked-by: Sudeep Holla Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- .../devicetree/bindings/arm/arm,vexpress-juno.yaml | 223 +++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml new file mode 100644 index 000000000000..8c06a73f716c --- /dev/null +++ b/Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml @@ -0,0 +1,223 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/arm,vexpress-juno.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: ARM Versatile Express and Juno Boards Device Tree Bindings + +maintainers: + - Sudeep Holla + - Linus Walleij + +description: |+ + ARM's Versatile Express platform were built as reference designs for exploring + multicore Cortex-A class systems. The Versatile Express family contains both + 32 bit (Aarch32) and 64 bit (Aarch64) systems. + + The board consist of a motherboard and one or more daughterboards (tiles). The + motherboard provides a set of peripherals. Processor and RAM "live" on the + tiles. + + The motherboard and each core tile should be described by a separate Device + Tree source file, with the tile's description including the motherboard file + using an include directive. As the motherboard can be initialized in one of + two different configurations ("memory maps"), care must be taken to include + the correct one. + + When a new generation of boards were introduced under the name "Juno", these + shared to many common characteristics with the Versatile Express that the + "arm,vexpress" compatible was retained in the root node, and these are + included in this binding schema as well. + + The root node indicates the CPU SoC on the core tile, and this + is a daughterboard to the main motherboard. The name used in the compatible + string shall match the name given in the core tile's technical reference + manual, followed by "arm,vexpress" as an additional compatible value. If + further subvariants are released of the core tile, even more fine-granular + compatible strings with up to three compatible strings are used. + +properties: + $nodename: + const: '/' + compatible: + oneOf: + - description: CoreTile Express A9x4 (V2P-CA9) has 4 Cortex A9 CPU cores + in MPCore configuration in a test chip on the core tile. See ARM + DUI 0448I. This was the first Versatile Express platform. + items: + - const: arm,vexpress,v2p-ca9 + - const: arm,vexpress + - description: CoreTile Express A5x2 (V2P-CA5s) has 2 Cortex A5 CPU cores + in a test chip on the core tile. It is intended to evaluate NEON, FPU + and Jazelle support in the Cortex A5 family. See ARM DUI 0541C. + items: + - const: arm,vexpress,v2p-ca5s + - const: arm,vexpress + - description: Coretile Express A15x2 (V2P-CA15) has 2 Cortex A15 CPU + cores in a MPCore configuration in a test chip on the core tile. See + ARM DUI 0604F. + items: + - const: arm,vexpress,v2p-ca15 + - const: arm,vexpress + - description: CoreTile Express A15x4 (V2P-CA15, HBI-0237A) has 4 Cortex + A15 CPU cores in a test chip on the core tile. This is the first test + chip called "TC1". + items: + - const: arm,vexpress,v2p-ca15,tc1 + - const: arm,vexpress,v2p-ca15 + - const: arm,vexpress + - description: Coretile Express A15x2 A7x3 (V2P-CA15_A7) has 2 Cortex A15 + CPU cores and 3 Cortex A7 cores in a big.LITTLE MPCore configuration + in a test chip on the core tile. See ARM DDI 0503I. + items: + - const: arm,vexpress,v2p-ca15_a7 + - const: arm,vexpress + - description: LogicTile Express 20MG (V2F-1XV7) has 2 Cortex A53 CPU + cores in a test chip on the core tile. See ARM DDI 0498D. + items: + - const: arm,vexpress,v2f-1xv7,ca53x2 + - const: arm,vexpress,v2f-1xv7 + - const: arm,vexpress + - description: Arm Versatile Express Juno "r0" (the first Juno board, + V2M-Juno) was introduced as a vehicle for evaluating big.LITTLE on + AArch64 CPU cores. It has 2 Cortex A57 CPU cores and 4 Cortex A53 + cores in a big.LITTLE configuration. It also features the MALI T624 + GPU. See ARM document 100113_0000_07_en. + items: + - const: arm,juno + - const: arm,vexpress + - description: Arm Versatile Express Juno r1 Development Platform + (V2M-Juno r1) was introduced mainly aimed at development of PCIe + based systems. Juno r1 also has support for AXI masters placed on + the TLX connectors to join the coherency domain. Otherwise it is the + same configuration as Juno r0. See ARM document 100122_0100_06_en. + items: + - const: arm,juno-r1 + - const: arm,juno + - const: arm,vexpress + - description: Arm Versatile Express Juno r2 Development Platform + (V2M-Juno r2). It has the same feature set as Juno r0 and r1. See + ARM document 100114_0200_04_en. + items: + - const: arm,juno-r2 + - const: arm,juno + - const: arm,vexpress + - description: Arm AEMv8a Versatile Express Real-Time System Model + (VE RTSM) is a programmers view of the Versatile Express with Arm + v8A hardware. See ARM DUI 0575D. + items: + - const: arm,rtsm_ve,aemv8a + - const: arm,vexpress + - description: Arm FVP (Fixed Virtual Platform) base model revision C + See ARM Document 100964_1190_00_en. + items: + - const: arm,fvp-base-revc + - const: arm,vexpress + - description: Arm Foundation model for Aarch64 + items: + - const: arm,foundation-aarch64 + - const: arm,vexpress + + arm,hbi: + $ref: '/schemas/types.yaml#/definitions/uint32' + description: This indicates the ARM HBI (Hardware Board ID), this is + ARM's unique board model ID, visible on the PCB's silkscreen. + + arm,vexpress,site: + description: As Versatile Express can be configured in number of physically + different setups, the device tree should describe platform topology. + For this reason the root node and main motherboard node must define this + property, describing the physical location of the children nodes. + 0 means motherboard site, while 1 and 2 are daughterboard sites, and + 0xf means "sisterboard" which is the site containing the main CPU tile. + allOf: + - $ref: '/schemas/types.yaml#/definitions/uint32' + - minimum: 0 + maximum: 15 + + arm,vexpress,position: + description: When daughterboards are stacked on one site, their position + in the stack be be described this attribute. + allOf: + - $ref: '/schemas/types.yaml#/definitions/uint32' + - minimum: 0 + maximum: 3 + + arm,vexpress,dcc: + description: When describing tiles consisting of more than one DCC, its + number can be specified with this attribute. + allOf: + - $ref: '/schemas/types.yaml#/definitions/uint32' + - minimum: 0 + maximum: 3 + +patternProperties: + "^bus@[0-9a-f]+$": + description: Static Memory Bus (SMB) node, if this exists it describes + the connection between the motherboard and any tiles. Sometimes the + compatible is placed directly under this node, sometimes it is placed + in a subnode named "motherboard". Sometimes the compatible includes + "arm,vexpress,v2?-p1" sometimes (on software models) is is just + "simple-bus". If the compatible is placed in the "motherboard" node, + it is stricter and always has two compatibles. + type: object + allOf: + - $ref: '/schemas/simple-bus.yaml' + + properties: + compatible: + oneOf: + - items: + - enum: + - arm,vexpress,v2m-p1 + - arm,vexpress,v2p-p1 + - const: simple-bus + - const: simple-bus + motherboard: + type: object + description: The motherboard description provides a single "motherboard" + node using 2 address cells corresponding to the Static Memory Bus + used between the motherboard and the tile. The first cell defines the + Chip Select (CS) line number, the second cell address offset within + the CS. All interrupt lines between the motherboard and the tile + are active high and are described using single cell. + properties: + "#address-cells": + const: 2 + "#size-cells": + const: 1 + compatible: + items: + - enum: + - arm,vexpress,v2m-p1 + - arm,vexpress,v2p-p1 + - const: simple-bus + arm,v2m-memory-map: + description: This describes the memory map type. + allOf: + - $ref: '/schemas/types.yaml#/definitions/string' + - enum: + - rs1 + - rs2 + required: + - compatible + required: + - compatible + +allOf: + - if: + properties: + compatible: + contains: + enum: + - arm,vexpress,v2p-ca9 + - arm,vexpress,v2p-ca5s + - arm,vexpress,v2p-ca15 + - arm,vexpress,v2p-ca15_a7 + - arm,vexpress,v2f-1xv7,ca53x2 + then: + required: + - arm,hbi + +... -- cgit v1.2.3 From 2d483550b6d21de8af60e67e728fa118a537b07d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 10 Feb 2020 10:21:31 +0100 Subject: dt-bindings: arm: Drop the non-YAML bindings We created new bindings for the ARM Board using YAML so delete the old human-parseable-only bindings. Cc: Sudeep Holla Reviewed-by: Rob Herring Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/arm/arm-boards | 237 --------------------- Documentation/devicetree/bindings/arm/vexpress.txt | 229 -------------------- 2 files changed, 466 deletions(-) delete mode 100644 Documentation/devicetree/bindings/arm/arm-boards delete mode 100644 Documentation/devicetree/bindings/arm/vexpress.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards deleted file mode 100644 index 96b1dad58253..000000000000 --- a/Documentation/devicetree/bindings/arm/arm-boards +++ /dev/null @@ -1,237 +0,0 @@ -ARM Integrator/AP (Application Platform) and Integrator/CP (Compact Platform) ------------------------------------------------------------------------------ -ARM's oldest Linux-supported platform with connectors for different core -tiles of ARMv4, ARMv5 and ARMv6 type. - -Required properties (in root node): - compatible = "arm,integrator-ap"; /* Application Platform */ - compatible = "arm,integrator-cp"; /* Compact Platform */ - -FPGA type interrupt controllers, see the versatile-fpga-irq binding doc. - -Required nodes: - -- core-module: the root node to the Integrator platforms must have - a core-module with regs and the compatible string - "arm,core-module-integrator" -- external-bus-interface: the root node to the Integrator platforms - must have an external bus interface with regs and the - compatible-string "arm,external-bus-interface" - - Required properties for the core module: - - regs: the location and size of the core module registers, one - range of 0x200 bytes. - -- syscon: the root node of the Integrator platforms must have a - system controller node pointing to the control registers, - with the compatible string - "arm,integrator-ap-syscon" - "arm,integrator-cp-syscon" - respectively. - - Required properties for the system controller: - - regs: the location and size of the system controller registers, - one range of 0x100 bytes. - - Required properties for the AP system controller: - - interrupts: the AP syscon node must include the logical module - interrupts, stated in order of module instance , - , ... for the CP system controller this - is not required not of any use. - -/dts-v1/; -/include/ "integrator.dtsi" - -/ { - model = "ARM Integrator/AP"; - compatible = "arm,integrator-ap"; - - core-module@10000000 { - compatible = "arm,core-module-integrator"; - reg = <0x10000000 0x200>; - }; - - ebi@12000000 { - compatible = "arm,external-bus-interface"; - reg = <0x12000000 0x100>; - }; - - syscon { - compatible = "arm,integrator-ap-syscon"; - reg = <0x11000000 0x100>; - interrupt-parent = <&pic>; - /* These are the logic module IRQs */ - interrupts = <9>, <10>, <11>, <12>; - }; -}; - - -ARM Versatile Application and Platform Baseboards -------------------------------------------------- -ARM's development hardware platform with connectors for customizable -core tiles. The hardware configuration of the Versatile boards is -highly customizable. - -Required properties (in root node): - compatible = "arm,versatile-ab"; /* Application baseboard */ - compatible = "arm,versatile-pb"; /* Platform baseboard */ - -Interrupt controllers: -- VIC required properties: - compatible = "arm,versatile-vic"; - interrupt-controller; - #interrupt-cells = <1>; - -- SIC required properties: - compatible = "arm,versatile-sic"; - interrupt-controller; - #interrupt-cells = <1>; - -Required nodes: - -- core-module: the root node to the Versatile platforms must have - a core-module with regs and the compatible strings - "arm,core-module-versatile", "syscon" - -Optional nodes: - -- arm,versatile-ib2-syscon : if the Versatile has an IB2 interface - board mounted, this has a separate system controller that is - defined in this node. - Required properties: - compatible = "arm,versatile-ib2-syscon", "syscon" - -ARM RealView Boards -------------------- -The RealView boards cover tailored evaluation boards that are used to explore -the ARM11 and Cortex A-8 and Cortex A-9 processors. - -Required properties (in root node): - /* RealView Emulation Baseboard */ - compatible = "arm,realview-eb"; - /* RealView Platform Baseboard for ARM1176JZF-S */ - compatible = "arm,realview-pb1176"; - /* RealView Platform Baseboard for ARM11 MPCore */ - compatible = "arm,realview-pb11mp"; - /* RealView Platform Baseboard for Cortex A-8 */ - compatible = "arm,realview-pba8"; - /* RealView Platform Baseboard Explore for Cortex A-9 */ - compatible = "arm,realview-pbx"; - -Required nodes: - -- soc: some node of the RealView platforms must be the SoC - node that contain the SoC-specific devices, with the compatible - string set to one of these tuples: - "arm,realview-eb-soc", "simple-bus" - "arm,realview-pb1176-soc", "simple-bus" - "arm,realview-pb11mp-soc", "simple-bus" - "arm,realview-pba8-soc", "simple-bus" - "arm,realview-pbx-soc", "simple-bus" - -- syscon: some subnode of the RealView SoC node must be a - system controller node pointing to the control registers, - with the compatible string set to one of these: - "arm,realview-eb11mp-revb-syscon", "arm,realview-eb-syscon", "syscon" - "arm,realview-eb11mp-revc-syscon", "arm,realview-eb-syscon", "syscon" - "arm,realview-eb-syscon", "syscon" - "arm,realview-pb1176-syscon", "syscon" - "arm,realview-pb11mp-syscon", "syscon" - "arm,realview-pba8-syscon", "syscon" - "arm,realview-pbx-syscon", "syscon" - - Required properties for the system controller: - - regs: the location and size of the system controller registers, - one range of 0x1000 bytes. - -Example: - -/dts-v1/; -#include - -/ { - model = "ARM RealView PB1176 with device tree"; - compatible = "arm,realview-pb1176"; - #address-cells = <1>; - #size-cells = <1>; - - soc { - #address-cells = <1>; - #size-cells = <1>; - compatible = "arm,realview-pb1176-soc", "simple-bus"; - ranges; - - syscon: syscon@10000000 { - compatible = "arm,realview-syscon", "syscon"; - reg = <0x10000000 0x1000>; - }; - - }; -}; - -ARM Versatile Express Boards ------------------------------ -For details on the device tree bindings for ARM Versatile Express boards -please consult the vexpress.txt file in the same directory as this file. - -ARM Juno Boards ----------------- -The Juno boards are targeting development for AArch64 systems. The first -iteration, Juno r0, is a vehicle for evaluating big.LITTLE on AArch64, -with the second iteration, Juno r1, mainly aimed at development of PCIe -based systems. Juno r1 also has support for AXI masters placed on the TLX -connectors to join the coherency domain. - -Juno boards are described in a similar way to ARM Versatile Express boards, -with the motherboard part of the hardware being described in a separate file -to highlight the fact that is part of the support infrastructure for the SoC. -Juno device tree bindings also share the Versatile Express bindings as -described under the RS1 memory mapping. - -Required properties (in root node): - compatible = "arm,juno"; /* For Juno r0 board */ - compatible = "arm,juno-r1"; /* For Juno r1 board */ - compatible = "arm,juno-r2"; /* For Juno r2 board */ - -Required nodes: -The description for the board must include: - - a "psci" node describing the boot method used for the secondary CPUs. - A detailed description of the bindings used for "psci" nodes is present - in the psci.yaml file. - - a "cpus" node describing the available cores and their associated - "enable-method"s. For more details see cpus.yaml file. - -Example: - -/dts-v1/; -/ { - model = "ARM Juno development board (r0)"; - compatible = "arm,juno", "arm,vexpress"; - interrupt-parent = <&gic>; - #address-cells = <2>; - #size-cells = <2>; - - cpus { - #address-cells = <2>; - #size-cells = <0>; - - A57_0: cpu@0 { - compatible = "arm,cortex-a57"; - reg = <0x0 0x0>; - device_type = "cpu"; - enable-method = "psci"; - }; - - ..... - - A53_0: cpu@100 { - compatible = "arm,cortex-a53"; - reg = <0x0 0x100>; - device_type = "cpu"; - enable-method = "psci"; - }; - - ..... - }; - -}; diff --git a/Documentation/devicetree/bindings/arm/vexpress.txt b/Documentation/devicetree/bindings/arm/vexpress.txt deleted file mode 100644 index 39844cd0bcce..000000000000 --- a/Documentation/devicetree/bindings/arm/vexpress.txt +++ /dev/null @@ -1,229 +0,0 @@ -ARM Versatile Express boards family ------------------------------------ - -ARM's Versatile Express platform consists of a motherboard and one -or more daughterboards (tiles). The motherboard provides a set of -peripherals. Processor and RAM "live" on the tiles. - -The motherboard and each core tile should be described by a separate -Device Tree source file, with the tile's description including -the motherboard file using a /include/ directive. As the motherboard -can be initialized in one of two different configurations ("memory -maps"), care must be taken to include the correct one. - - -Root node ---------- - -Required properties in the root node: -- compatible value: - compatible = "arm,vexpress,", "arm,vexpress"; - where is the full tile model name (as used in the tile's - Technical Reference Manual), eg.: - - for Coretile Express A5x2 (V2P-CA5s): - compatible = "arm,vexpress,v2p-ca5s", "arm,vexpress"; - - for Coretile Express A9x4 (V2P-CA9): - compatible = "arm,vexpress,v2p-ca9", "arm,vexpress"; - If a tile comes in several variants or can be used in more then one - configuration, the compatible value should be: - compatible = "arm,vexpress,,", \ - "arm,vexpress,", "arm,vexpress"; - eg: - - Coretile Express A15x2 (V2P-CA15) with Tech Chip 1: - compatible = "arm,vexpress,v2p-ca15,tc1", \ - "arm,vexpress,v2p-ca15", "arm,vexpress"; - - LogicTile Express 13MG (V2F-2XV6) running Cortex-A7 (3 cores) SMM: - compatible = "arm,vexpress,v2f-2xv6,ca7x3", \ - "arm,vexpress,v2f-2xv6", "arm,vexpress"; - -Optional properties in the root node: -- tile model name (use name from the tile's Technical Reference - Manual, eg. "V2P-CA5s") - model = ""; -- tile's HBI number (unique ARM's board model ID, visible on the - PCB's silkscreen) in hexadecimal transcription: - arm,hbi = <0xhbi> - eg: - - for Coretile Express A5x2 (V2P-CA5s) HBI-0191: - arm,hbi = <0x191>; - - Coretile Express A9x4 (V2P-CA9) HBI-0225: - arm,hbi = <0x225>; - - -CPU nodes ---------- - -Top-level standard "cpus" node is required. It must contain a node -with device_type = "cpu" property for every available core, eg.: - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - device_type = "cpu"; - compatible = "arm,cortex-a5"; - reg = <0>; - }; - }; - - -Configuration infrastructure ----------------------------- - -The platform has an elaborated configuration system, consisting of -microcontrollers residing on the mother- and daughterboards known -as Motherboard/Daughterboard Configuration Controller (MCC and DCC). -The controllers are responsible for the platform initialization -(reset generation, flash programming, FPGA bitfiles loading etc.) -but also control clock generators, voltage regulators, gather -environmental data like temperature, power consumption etc. Even -the video output switch (FPGA) is controlled that way. - -The controllers are not mapped into normal memory address space -and must be accessed through bridges - other devices capable -of generating transactions on the configuration bus. - -The nodes describing configuration controllers must define -the following properties: -- compatible value: - compatible = "arm,vexpress,config-bus"; -- bridge phandle: - arm,vexpress,config-bridge = ; -and children describing available functions. - - -Platform topology ------------------ - -As Versatile Express can be configured in number of physically -different setups, the device tree should describe platform topology. -Root node and main motherboard node must define the following -property, describing physical location of the children nodes: -- site number: - arm,vexpress,site = ; - where 0 means motherboard, 1 or 2 are daugtherboard sites, - 0xf means "master" site (site containing main CPU tile) -- when daughterboards are stacked on one site, their position - in the stack be be described with: - arm,vexpress,position = ; -- when describing tiles consisting more than one DCC, its number - can be described with: - arm,vexpress,dcc = ; - -Any of the numbers above defaults to zero if not defined in -the node or any of its parent. - - -Motherboard ------------ - -The motherboard description file provides a single "motherboard" node -using 2 address cells corresponding to the Static Memory Bus used -between the motherboard and the tile. The first cell defines the Chip -Select (CS) line number, the second cell address offset within the CS. -All interrupt lines between the motherboard and the tile are active -high and are described using single cell. - -Optional properties of the "motherboard" node: -- motherboard's memory map variant: - arm,v2m-memory-map = ""; - where name is one of: - - "rs1" - for RS1 map (i.a. peripherals on CS3); this map is also - referred to as "ARM Cortex-A Series memory map": - arm,v2m-memory-map = "rs1"; - When this property is missing, the motherboard is using the original - memory map (also known as the "Legacy memory map", primarily used - with the original CoreTile Express A9x4) with peripherals on CS7. - -Motherboard .dtsi files provide a set of labelled peripherals that -can be used to obtain required phandle in the tile's "aliases" node: -- UARTs, note that the numbers correspond to the physical connectors - on the motherboard's back panel: - v2m_serial0, v2m_serial1, v2m_serial2 and v2m_serial3 -- I2C controllers: - v2m_i2c_dvi and v2m_i2c_pcie -- SP804 timers: - v2m_timer01 and v2m_timer23 - -The tile description should define a "smb" node, describing the -Static Memory Bus between the tile and motherboard. It must define -the following properties: -- "simple-bus" compatible value (to ensure creation of the children) - compatible = "simple-bus"; -- mapping of the SMB CS/offset addresses into main address space: - #address-cells = <2>; - #size-cells = <1>; - ranges = <...>; -- interrupts mapping: - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 63>; - interrupt-map = <...>; - - -Example of a VE tile description (simplified) ---------------------------------------------- - -/dts-v1/; - -/ { - model = "V2P-CA5s"; - arm,hbi = <0x225>; - arm,vexpress,site = <0xf>; - compatible = "arm,vexpress-v2p-ca5s", "arm,vexpress"; - interrupt-parent = <&gic>; - #address-cells = <1>; - #size-cells = <1>; - - chosen { }; - - aliases { - serial0 = &v2m_serial0; - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - device_type = "cpu"; - compatible = "arm,cortex-a5"; - reg = <0>; - }; - }; - - gic: interrupt-controller@2c001000 { - compatible = "arm,cortex-a9-gic"; - #interrupt-cells = <3>; - #address-cells = <0>; - interrupt-controller; - reg = <0x2c001000 0x1000>, - <0x2c000100 0x100>; - }; - - dcc { - compatible = "arm,vexpress,config-bus"; - arm,vexpress,config-bridge = <&v2m_sysreg>; - - osc@0 { - compatible = "arm,vexpress-osc"; - }; - }; - - smb { - compatible = "simple-bus"; - - #address-cells = <2>; - #size-cells = <1>; - /* CS0 is visible at 0x08000000 */ - ranges = <0 0 0x08000000 0x04000000>; - - #interrupt-cells = <1>; - interrupt-map-mask = <0 0 63>; - /* Active high IRQ 0 is connected to GIC's SPI0 */ - interrupt-map = <0 0 0 &gic 0 0 4>; - - /include/ "vexpress-v2m-rs1.dtsi" - }; -}; - -- cgit v1.2.3 From 4d0dd3802ee1b6b14b7c46621cd581eb7c1ade79 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 27 Feb 2020 16:28:33 -0600 Subject: dt-bindings: bus: ti-sysc: Add support for PRUSS SYSC type The PRUSS module has a SYSCFG which is unique. The SYSCFG has two additional unique fields called STANDBY_INIT and SUB_MWAIT in addition to regular IDLE_MODE and STANDBY_MODE fields. Add the bindings for this new sysc type. Cc: Rob Herring Signed-off-by: Roger Quadros Signed-off-by: Suman Anna Acked-by: Rob Herring Signed-off-by: Tony Lindgren --- Documentation/devicetree/bindings/bus/ti-sysc.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.txt b/Documentation/devicetree/bindings/bus/ti-sysc.txt index 233eb8294204..c984143d08d2 100644 --- a/Documentation/devicetree/bindings/bus/ti-sysc.txt +++ b/Documentation/devicetree/bindings/bus/ti-sysc.txt @@ -38,6 +38,7 @@ Required standard properties: "ti,sysc-dra7-mcasp" "ti,sysc-usb-host-fs" "ti,sysc-dra7-mcan" + "ti,sysc-pruss" - reg shall have register areas implemented for the interconnect target module in question such as revision, sysc and syss -- cgit v1.2.3 From 8375e74f2bca9802a4ddf431a6d7bd2ab9950f27 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 21 Feb 2020 17:40:35 -0800 Subject: driver core: Add fw_devlink kernel commandline option fwnode_operations.add_links allows creating device links from information provided by firmware. fwnode_operations.add_links is currently implemented only by OF/devicetree code and a specific case of efi. However, there's nothing preventing ACPI or other firmware types from implementing it. The OF implementation is currently controlled by a kernel commandline parameter called of_devlink. Since this feature is generic isn't limited to OF, add a generic fw_devlink kernel commandline parameter to control this feature across firmware types. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20200222014038.180923-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index dbc22d684627..29985152b66d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1350,6 +1350,24 @@ can be changed at run time by the max_graph_depth file in the tracefs tracing directory. default: 0 (no limit) + fw_devlink= [KNL] Create device links between consumer and supplier + devices by scanning the firmware to infer the + consumer/supplier relationships. This feature is + especially useful when drivers are loaded as modules as + it ensures proper ordering of tasks like device probing + (suppliers first, then consumers), supplier boot state + clean up (only after all consumers have probed), + suspend/resume & runtime PM (consumers first, then + suppliers). + Format: { off | permissive | on | rpm } + off -- Don't create device links from firmware info. + permissive -- Create device links from firmware info + but use it only for ordering boot state clean + up (sync_state() calls). + on -- Create device links from firmware info and use it + to enforce probe and suspend/resume ordering. + rpm -- Like "on", but also use to order runtime PM. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) -- cgit v1.2.3 From e94f62b7140fa3da4c69a685b2e73ef52dd32c51 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Fri, 21 Feb 2020 17:40:38 -0800 Subject: of: property: Delete of_devlink kernel commandline option With the addition of fw_devlink kernel commandline option, of_devlink is redundant and not useful anymore. So, delete it. Acked-by: Rob Herring Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20200222014038.180923-6-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 6 ------ 1 file changed, 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 29985152b66d..6692b2aa6140 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3299,12 +3299,6 @@ This can be set from sysctl after boot. See Documentation/admin-guide/sysctl/vm.rst for details. - of_devlink [OF, KNL] Create device links between consumer and - supplier devices by scanning the devictree to infer the - consumer/supplier relationships. A consumer device - will not be probed until all the supplier devices have - probed successfully. - ohci1394_dma=early [HW] enable debugging via the ohci1394 driver. See Documentation/debugging-via-ohci1394.txt for more info. -- cgit v1.2.3 From 50b62071deab48c1a69c471f9a7d0c8ff9ef23eb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 2 Mar 2020 02:19:53 +0200 Subject: spi: spi-fsl-dspi: Add specific compatibles for all Layerscape SoCs Make the second compatible string optional for LS1012A, LS1088A and LS2080A. Old versions of the spi-fsl-dspi.c driver still need to probe on the old, generic compatible string for these controllers (such as "fsl,ls1021a-v1.0-dspi") which provides less functionality. Document the device tree bindings for LS1043A and LS1046A, whose bindings are already in use in fsl-ls1043a.dtsi and fsl-ls1046a.dtsi. Introduce new compatible strings for LS1028A and LX2160A. There will be no second compatible string for these. Signed-off-by: Vladimir Oltean Message-Id: <20200302001958.11105-2-olteanv@gmail.com> Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt index 162e024b95a0..99b94cfe1623 100644 --- a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt +++ b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt @@ -1,12 +1,17 @@ ARM Freescale DSPI controller Required properties: -- compatible : "fsl,vf610-dspi", "fsl,ls1021a-v1.0-dspi", - "fsl,ls2085a-dspi" - or - "fsl,ls2080a-dspi" followed by "fsl,ls2085a-dspi" - "fsl,ls1012a-dspi" followed by "fsl,ls1021a-v1.0-dspi" - "fsl,ls1088a-dspi" followed by "fsl,ls1021a-v1.0-dspi" +- compatible : must be one of: + "fsl,vf610-dspi", + "fsl,ls1021a-v1.0-dspi", + "fsl,ls1012a-dspi" (optionally followed by "fsl,ls1021a-v1.0-dspi"), + "fsl,ls1028a-dspi", + "fsl,ls1043a-dspi" (optionally followed by "fsl,ls1021a-v1.0-dspi"), + "fsl,ls1046a-dspi" (optionally followed by "fsl,ls1021a-v1.0-dspi"), + "fsl,ls1088a-dspi" (optionally followed by "fsl,ls1021a-v1.0-dspi"), + "fsl,ls2080a-dspi" (optionally followed by "fsl,ls2085a-dspi"), + "fsl,ls2085a-dspi", + "fsl,lx2160a-dspi", - reg : Offset and length of the register set for the device - interrupts : Should contain SPI controller interrupt - clocks: from common clock binding: handle to dspi clock. -- cgit v1.2.3 From c5f3f6ab5bc13da4432a11c6013e6b911a53c925 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 27 Feb 2020 21:36:48 +0900 Subject: dt-bindings: arm: Convert UniPhier System Cache to json-schema Convert the UniPhier System Cache binding to DT schema format. This is a full-custom outer cache (L2 and L3) used on UniPhier ARM 32-bit SoCs. While I was here, I added the interrupts property. This is not used in Linux, but the hardware has interrupt lines at least. Signed-off-by: Masahiro Yamada Signed-off-by: Rob Herring --- .../bindings/arm/socionext/cache-uniphier.txt | 60 ------------ .../socionext/socionext,uniphier-system-cache.yaml | 102 +++++++++++++++++++++ 2 files changed, 102 insertions(+), 60 deletions(-) delete mode 100644 Documentation/devicetree/bindings/arm/socionext/cache-uniphier.txt create mode 100644 Documentation/devicetree/bindings/arm/socionext/socionext,uniphier-system-cache.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/socionext/cache-uniphier.txt b/Documentation/devicetree/bindings/arm/socionext/cache-uniphier.txt deleted file mode 100644 index d27a646f48a9..000000000000 --- a/Documentation/devicetree/bindings/arm/socionext/cache-uniphier.txt +++ /dev/null @@ -1,60 +0,0 @@ -UniPhier outer cache controller - -UniPhier SoCs are integrated with a full-custom outer cache controller system. -All of them have a level 2 cache controller, and some have a level 3 cache -controller as well. - -Required properties: -- compatible: should be "socionext,uniphier-system-cache" -- reg: offsets and lengths of the register sets for the device. It should - contain 3 regions: control register, revision register, operation register, - in this order. -- cache-unified: specifies the cache is a unified cache. -- cache-size: specifies the size in bytes of the cache -- cache-sets: specifies the number of associativity sets of the cache -- cache-line-size: specifies the line size in bytes -- cache-level: specifies the level in the cache hierarchy. The value should - be 2 for L2 cache, 3 for L3 cache, etc. - -Optional properties: -- next-level-cache: phandle to the next level cache if present. The next level - cache should be also compatible with "socionext,uniphier-system-cache". - -The L2 cache must exist to use the L3 cache; the cache hierarchy must be -indicated correctly with "next-level-cache" properties. - -Example 1 (system with L2): - l2: l2-cache@500c0000 { - compatible = "socionext,uniphier-system-cache"; - reg = <0x500c0000 0x2000>, <0x503c0100 0x4>, - <0x506c0000 0x400>; - cache-unified; - cache-size = <0x80000>; - cache-sets = <256>; - cache-line-size = <128>; - cache-level = <2>; - }; - -Example 2 (system with L2 and L3): - l2: l2-cache@500c0000 { - compatible = "socionext,uniphier-system-cache"; - reg = <0x500c0000 0x2000>, <0x503c0100 0x8>, - <0x506c0000 0x400>; - cache-unified; - cache-size = <0x200000>; - cache-sets = <512>; - cache-line-size = <128>; - cache-level = <2>; - next-level-cache = <&l3>; - }; - - l3: l3-cache@500c8000 { - compatible = "socionext,uniphier-system-cache"; - reg = <0x500c8000 0x2000>, <0x503c8100 0x8>, - <0x506c8000 0x400>; - cache-unified; - cache-size = <0x400000>; - cache-sets = <512>; - cache-line-size = <256>; - cache-level = <3>; - }; diff --git a/Documentation/devicetree/bindings/arm/socionext/socionext,uniphier-system-cache.yaml b/Documentation/devicetree/bindings/arm/socionext/socionext,uniphier-system-cache.yaml new file mode 100644 index 000000000000..2e765bb3e6f6 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/socionext/socionext,uniphier-system-cache.yaml @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/arm/socionext/socionext,uniphier-system-cache.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: UniPhier outer cache controller + +description: | + UniPhier ARM 32-bit SoCs are integrated with a full-custom outer cache + controller system. All of them have a level 2 cache controller, and some + have a level 3 cache controller as well. + +maintainers: + - Masahiro Yamada + +properties: + compatible: + const: socionext,uniphier-system-cache + + reg: + description: | + should contain 3 regions: control register, revision register, + operation register, in this order. + minItems: 3 + maxItems: 3 + + interrupts: + description: | + Interrupts can be used to notify the completion of cache operations. + The number of interrupts should match to the number of CPU cores. + The specified interrupts correspond to CPU0, CPU1, ... in this order. + minItems: 1 + maxItems: 4 + + cache-unified: true + + cache-size: true + + cache-sets: true + + cache-line-size: true + + cache-level: + minimum: 2 + maximum: 3 + + next-level-cache: true + +allOf: + - $ref: /schemas/cache-controller.yaml# + +additionalProperties: false + +required: + - compatible + - reg + - interrupts + - cache-unified + - cache-size + - cache-sets + - cache-line-size + - cache-level + +examples: + - | + // System with L2. + cache-controller@500c0000 { + compatible = "socionext,uniphier-system-cache"; + reg = <0x500c0000 0x2000>, <0x503c0100 0x4>, <0x506c0000 0x400>; + interrupts = <0 174 4>, <0 175 4>, <0 190 4>, <0 191 4>; + cache-unified; + cache-size = <0x140000>; + cache-sets = <512>; + cache-line-size = <128>; + cache-level = <2>; + }; + - | + // System with L2 and L3. + // L2 should specify the next level cache by 'next-level-cache'. + l2: cache-controller@500c0000 { + compatible = "socionext,uniphier-system-cache"; + reg = <0x500c0000 0x2000>, <0x503c0100 0x8>, <0x506c0000 0x400>; + interrupts = <0 190 4>, <0 191 4>; + cache-unified; + cache-size = <0x200000>; + cache-sets = <512>; + cache-line-size = <128>; + cache-level = <2>; + next-level-cache = <&l3>; + }; + + l3: cache-controller@500c8000 { + compatible = "socionext,uniphier-system-cache"; + reg = <0x500c8000 0x2000>, <0x503c8100 0x8>, <0x506c8000 0x400>; + interrupts = <0 174 4>, <0 175 4>; + cache-unified; + cache-size = <0x200000>; + cache-sets = <512>; + cache-line-size = <256>; + cache-level = <3>; + }; -- cgit v1.2.3 From f206c0eeb476f6eaa966f6e21a8831bb766f8659 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sat, 29 Feb 2020 16:15:04 +0100 Subject: dt-bindings: Add vendor prefix for Elida Shenzen Elida Technology Co. Ltd. is a Chinese TFT manufacturer. Signed-off-by: Heiko Stuebner Acked-by: Sam Ravnborg Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index fcdda587def5..d035e38d6ba3 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -285,6 +285,8 @@ patternProperties: description: Elan Microelectronic Corp. "^elgin,.*": description: Elgin S/A. + "^elida,.*": + description: Shenzhen Elida Technology Co., Ltd. "^embest,.*": description: Shenzhen Embest Technology Co., Ltd. "^emlid,.*": -- cgit v1.2.3 From f15652859cbe34ec4f3667169facd86309f438d2 Mon Sep 17 00:00:00 2001 From: Sam Shih Date: Wed, 4 Mar 2020 19:06:12 +0800 Subject: dt-bindings: pwm: Update bindings for MT7629 SoC This updates bindings for MT7629 pwm controller. Signed-off-by: Sam Shih Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/pwm/pwm-mediatek.txt | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt index 95536d83c5f2..29adff59c479 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-mediatek.txt @@ -19,10 +19,15 @@ Required properties: - "pwm1-8": the eight per PWM clocks for mt2712 - "pwm1-6": the six per PWM clocks for mt7622 - "pwm1-5": the five per PWM clocks for mt7623 + - "pwm1" : the PWM1 clock for mt7629 - pinctrl-names: Must contain a "default" entry. - pinctrl-0: One property must exist for each entry in pinctrl-names. See pinctrl/pinctrl-bindings.txt for details of the property values. +Optional properties: +- assigned-clocks: Reference to the PWM clock entries. +- assigned-clock-parents: The phandle of the parent clock of PWM clock. + Example: pwm0: pwm@11006000 { compatible = "mediatek,mt7623-pwm"; -- cgit v1.2.3 From 922003733d42028e5b38f3cd1cc0811d7ffa4978 Mon Sep 17 00:00:00 2001 From: Yuti Amonkar Date: Thu, 6 Feb 2020 07:10:49 +0100 Subject: dt-bindings: phy: Remove Cadence MHDP PHY dt binding Remove the Cadence MHDP PHY bindings. The binding is added in next commit in YAML format. It is renamed to adopt torrent nomenclature. This will not affect ABI as the driver has never been functional, and therefore do not exist in any active use case. Signed-off-by: Yuti Amonkar Signed-off-by: Kishon Vijay Abraham I --- .../devicetree/bindings/phy/phy-cadence-dp.txt | 30 ---------------------- 1 file changed, 30 deletions(-) delete mode 100644 Documentation/devicetree/bindings/phy/phy-cadence-dp.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-dp.txt b/Documentation/devicetree/bindings/phy/phy-cadence-dp.txt deleted file mode 100644 index 7f49fd54ebc1..000000000000 --- a/Documentation/devicetree/bindings/phy/phy-cadence-dp.txt +++ /dev/null @@ -1,30 +0,0 @@ -Cadence MHDP DisplayPort SD0801 PHY binding -=========================================== - -This binding describes the Cadence SD0801 PHY hardware included with -the Cadence MHDP DisplayPort controller. - -------------------------------------------------------------------------------- -Required properties (controller (parent) node): -- compatible : Should be "cdns,dp-phy" -- reg : Defines the following sets of registers in the parent - mhdp device: - - Offset of the DPTX PHY configuration registers - - Offset of the SD0801 PHY configuration registers -- #phy-cells : from the generic PHY bindings, must be 0. - -Optional properties: -- num_lanes : Number of DisplayPort lanes to use (1, 2 or 4) -- max_bit_rate : Maximum DisplayPort link bit rate to use, in Mbps (2160, - 2430, 2700, 3240, 4320, 5400 or 8100) -------------------------------------------------------------------------------- - -Example: - dp_phy: phy@f0fb030a00 { - compatible = "cdns,dp-phy"; - reg = <0xf0 0xfb030a00 0x0 0x00000040>, - <0xf0 0xfb500000 0x0 0x00100000>; - num_lanes = <4>; - max_bit_rate = <8100>; - #phy-cells = <0>; - }; -- cgit v1.2.3 From c7a1a20e36faed2a298dd2965ab91c31dfc43f73 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 26 Jan 2020 08:09:11 -0600 Subject: spi: spi-nxp-fspi: Add support for imx8mm, imx8qxp Add support for nxp,imx8qxp-fspi and nxp,imx8mm-fspi do the bindings document. Signed-off-by: Adam Ford Link: https://lore.kernel.org/r/20200126140913.2139260-4-aford173@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-nxp-fspi.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.txt b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.txt index 2cd67eb727d4..7ac60d9fe357 100644 --- a/Documentation/devicetree/bindings/spi/spi-nxp-fspi.txt +++ b/Documentation/devicetree/bindings/spi/spi-nxp-fspi.txt @@ -2,6 +2,9 @@ Required properties: - compatible : Should be "nxp,lx2160a-fspi" + "nxp,imx8qxp-fspi" + "nxp,imx8mm-fspi" + - reg : First contains the register location and length, Second contains the memory mapping address and length - reg-names : Should contain the resource reg names: -- cgit v1.2.3 From 29d2daf2c33ce9b1038f3fcd55e16a788c00fc86 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Thu, 5 Mar 2020 12:55:46 +0100 Subject: spi: spi-fsl-dspi: Make bus-num property optional The SPI bus number is completely optional to Linux, so make the corresponding device tree property optional as well. Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20200305115546.31814-1-s.hauer@pengutronix.de Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt index 99b94cfe1623..30a79da9c039 100644 --- a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt +++ b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt @@ -19,11 +19,11 @@ Required properties: - pinctrl-0: pin control group to be used for this controller. - pinctrl-names: must contain a "default" entry. - spi-num-chipselects : the number of the chipselect signals. -- bus-num : the slave chip chipselect signal number. Optional property: - big-endian: If present the dspi device's registers are implemented in big endian mode. +- bus-num : the slave chip chipselect signal number. Optional SPI slave node properties: - fsl,spi-cs-sck-delay: a delay in nanoseconds between activating chip -- cgit v1.2.3 From 5de04175fa2720bacf34196275123be5f32e2b82 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 4 Mar 2020 19:42:01 +0100 Subject: dt-bindings: spi: convert rockchip spi bindings to yaml Current dts files with 'spi' nodes are manually verified. In order to automate this process spi-rockchip.txt has to be converted to yaml. In the new setup spi-rockchip.yaml will inherit properties from spi-controller.yaml. Add document to MAINTAINERS. Also rk3188.dtsi, rk3288.dtsi, rk3368.dtsi and rk3399.dtsi use an extra fallback string, so change this in the documentation. Changed: "rockchip,rk3188-spi", "rockchip,rk3066-spi" "rockchip,rk3288-spi", "rockchip,rk3066-spi" "rockchip,rk3368-spi", "rockchip,rk3066-spi" "rockchip,rk3399-spi", "rockchip,rk3066-spi" Signed-off-by: Johan Jonker Link: https://lore.kernel.org/r/20200304184203.9548-1-jbx6244@gmail.com Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/spi-rockchip.txt | 58 ------------ .../devicetree/bindings/spi/spi-rockchip.yaml | 105 +++++++++++++++++++++ 2 files changed, 105 insertions(+), 58 deletions(-) delete mode 100644 Documentation/devicetree/bindings/spi/spi-rockchip.txt create mode 100644 Documentation/devicetree/bindings/spi/spi-rockchip.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.txt b/Documentation/devicetree/bindings/spi/spi-rockchip.txt deleted file mode 100644 index a0edac12d8df..000000000000 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.txt +++ /dev/null @@ -1,58 +0,0 @@ -* Rockchip SPI Controller - -The Rockchip SPI controller is used to interface with various devices such as flash -and display controllers using the SPI communication interface. - -Required Properties: - -- compatible: should be one of the following. - "rockchip,rv1108-spi" for rv1108 SoCs. - "rockchip,px30-spi", "rockchip,rk3066-spi" for px30 SoCs. - "rockchip,rk3036-spi" for rk3036 SoCS. - "rockchip,rk3066-spi" for rk3066 SoCs. - "rockchip,rk3188-spi" for rk3188 SoCs. - "rockchip,rk3228-spi" for rk3228 SoCS. - "rockchip,rk3288-spi" for rk3288 SoCs. - "rockchip,rk3368-spi" for rk3368 SoCs. - "rockchip,rk3399-spi" for rk3399 SoCs. -- reg: physical base address of the controller and length of memory mapped - region. -- interrupts: The interrupt number to the cpu. The interrupt specifier format - depends on the interrupt controller. -- clocks: Must contain an entry for each entry in clock-names. -- clock-names: Shall be "spiclk" for the transfer-clock, and "apb_pclk" for - the peripheral clock. -- #address-cells: should be 1. -- #size-cells: should be 0. - -Optional Properties: - -- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, - Documentation/devicetree/bindings/dma/dma.txt -- dma-names: DMA request names should include "tx" and "rx" if present. -- rx-sample-delay-ns: nanoseconds to delay after the SCLK edge before sampling - Rx data (may need to be fine tuned for high capacitance lines). - No delay (0) by default. -- pinctrl-names: Names for the pin configuration(s); may be "default" or - "sleep", where the "sleep" configuration may describe the state - the pins should be in during system suspend. See also - pinctrl/pinctrl-bindings.txt. - - -Example: - - spi0: spi@ff110000 { - compatible = "rockchip,rk3066-spi"; - reg = <0xff110000 0x1000>; - dmas = <&pdma1 11>, <&pdma1 12>; - dma-names = "tx", "rx"; - rx-sample-delay-ns = <10>; - #address-cells = <1>; - #size-cells = <0>; - interrupts = ; - clocks = <&cru SCLK_SPI0>, <&cru PCLK_SPI0>; - clock-names = "spiclk", "apb_pclk"; - pinctrl-0 = <&spi1_pins>; - pinctrl-1 = <&spi1_sleep>; - pinctrl-names = "default", "sleep"; - }; diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml new file mode 100644 index 000000000000..bd1450c1274c --- /dev/null +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/spi/spi-rockchip.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip SPI Controller + +description: + The Rockchip SPI controller is used to interface with various devices such + as flash and display controllers using the SPI communication interface. + +allOf: + - $ref: "spi-controller.yaml#" + +maintainers: + - Heiko Stuebner + +# Everything else is described in the common file +properties: + compatible: + oneOf: + - const: rockchip,rk3036-spi + - const: rockchip,rk3066-spi + - const: rockchip,rk3228-spi + - const: rockchip,rv1108-spi + - items: + - enum: + - rockchip,px30-spi + - rockchip,rk3188-spi + - rockchip,rk3288-spi + - rockchip,rk3368-spi + - rockchip,rk3399-spi + - const: rockchip,rk3066-spi + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + items: + - description: transfer-clock + - description: peripheral clock + + clock-names: + items: + - const: spiclk + - const: apb_pclk + + dmas: + items: + - description: TX DMA Channel + - description: RX DMA Channel + + dma-names: + items: + - const: tx + - const: rx + + rx-sample-delay-ns: + default: 0 + description: + Nano seconds to delay after the SCLK edge before sampling Rx data + (may need to be fine tuned for high capacitance lines). + If not specified 0 will be used. + + pinctrl-names: + minItems: 1 + items: + - const: default + - const: sleep + description: + Names for the pin configuration(s); may be "default" or "sleep", + where the "sleep" configuration may describe the state + the pins should be in during system suspend. + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +examples: + - | + #include + #include + #include + spi0: spi@ff110000 { + compatible = "rockchip,rk3066-spi"; + reg = <0xff110000 0x1000>; + interrupts = ; + clocks = <&cru SCLK_SPI0>, <&cru PCLK_SPI0>; + clock-names = "spiclk", "apb_pclk"; + dmas = <&pdma1 11>, <&pdma1 12>; + dma-names = "tx", "rx"; + pinctrl-0 = <&spi1_pins>; + pinctrl-1 = <&spi1_sleep>; + pinctrl-names = "default", "sleep"; + rx-sample-delay-ns = <10>; + #address-cells = <1>; + #size-cells = <0>; + }; -- cgit v1.2.3 From db7dd939acf369b3e9bfac2ae75307639785de9e Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 4 Mar 2020 19:42:02 +0100 Subject: dt-bindings: spi: spi-rockchip: add description for rk3308 The description below is already in use for rk3308.dtsi, but was somehow never added to a document, so add "rockchip,rk3308-spi", "rockchip,rk3066-spi" for spi nodes on a rk3308 platform to spi-rockchip.yaml. Signed-off-by: Johan Jonker Acked-by: Rob Herring Link: https://lore.kernel.org/r/20200304184203.9548-2-jbx6244@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-rockchip.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index bd1450c1274c..456896e2d514 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -29,6 +29,7 @@ properties: - rockchip,px30-spi - rockchip,rk3188-spi - rockchip,rk3288-spi + - rockchip,rk3308-spi - rockchip,rk3368-spi - rockchip,rk3399-spi - const: rockchip,rk3066-spi -- cgit v1.2.3 From 6ac12131699b4ec1d8f339e40528bdd474542075 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Wed, 4 Mar 2020 19:42:03 +0100 Subject: dt-bindings: spi: spi-rockchip: add description for rk3328 The description below is already in use for rk3328.dtsi, but was somehow never added to a document, so add "rockchip,rk3328-spi", "rockchip,rk3066-spi" for spi nodes on a rk3328 platform to spi-rockchip.yaml. Signed-off-by: Johan Jonker Acked-by: Rob Herring Link: https://lore.kernel.org/r/20200304184203.9548-3-jbx6244@gmail.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-rockchip.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 456896e2d514..81ad4b761502 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -30,6 +30,7 @@ properties: - rockchip,rk3188-spi - rockchip,rk3288-spi - rockchip,rk3308-spi + - rockchip,rk3328-spi - rockchip,rk3368-spi - rockchip,rk3399-spi - const: rockchip,rk3066-spi -- cgit v1.2.3 From 06b741521622331eb01b67123e8cdda6ca8be187 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Wed, 4 Mar 2020 21:02:06 -0800 Subject: bpf, doc: Add BPF JIT for RV32G to BPF documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update filter.txt and admin-guide to mention the BPF JIT for RV32G. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Reviewed-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20200305050207.4159-4-luke.r.nels@gmail.com --- Documentation/admin-guide/sysctl/net.rst | 3 ++- Documentation/networking/filter.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 287b98708a40..e043c9213388 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -67,7 +67,8 @@ two flavors of JITs, the newer eBPF JIT currently supported on: - sparc64 - mips64 - s390x - - riscv + - riscv64 + - riscv32 And the older cBPF JIT supported on the following archs: diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index c4a328f2d57a..2f0f8b17dade 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -606,7 +606,7 @@ before a conversion to the new layout is being done behind the scenes! Currently, the classic BPF format is being used for JITing on most 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64, -sparc64, arm32, riscv (RV64G) perform JIT compilation from eBPF +sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF instruction set. Some core changes of the new internal format: -- cgit v1.2.3 From 12bd112bf8e41ad0a5cbf6d71beacca45eb7d10c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 3 Mar 2020 14:52:03 +0100 Subject: docs: cpu-freq: convert index.txt to ReST most of the stuff there can be re-used with ReST format, but we need to add an empty TOC and remove the existing entries, as the following conversion patches will be re-adding them, as they're converted. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/index.rst | 35 +++++++++++++++++++++++++ Documentation/cpu-freq/index.txt | 56 ---------------------------------------- Documentation/index.rst | 1 + 3 files changed, 36 insertions(+), 56 deletions(-) create mode 100644 Documentation/cpu-freq/index.rst delete mode 100644 Documentation/cpu-freq/index.txt (limited to 'Documentation') diff --git a/Documentation/cpu-freq/index.rst b/Documentation/cpu-freq/index.rst new file mode 100644 index 000000000000..1bff3dfddd23 --- /dev/null +++ b/Documentation/cpu-freq/index.rst @@ -0,0 +1,35 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================================== +Linux CPUFreq - CPU frequency and voltage scaling code in the Linux(TM) kernel +============================================================================== + +Author: Dominik Brodowski + + Clock scaling allows you to change the clock speed of the CPUs on the + fly. This is a nice method to save battery power, because the lower + the clock speed, the less power the CPU consumes. + + +.. toctree:: + :maxdepth: 1 + +Mailing List +------------ +There is a CPU frequency changing CVS commit and general list where +you can report bugs, problems or submit patches. To post a message, +send an email to linux-pm@vger.kernel.org. + +Links +----- +the FTP archives: +* ftp://ftp.linux.org.uk/pub/linux/cpufreq/ + +how to access the CVS repository: +* http://cvs.arm.linux.org.uk/ + +the CPUFreq Mailing list: +* http://vger.kernel.org/vger-lists.html#linux-pm + +Clock and voltage scaling for the SA-1100: +* http://www.lartmaker.nl/projects/scaling diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt deleted file mode 100644 index c15e75386a05..000000000000 --- a/Documentation/cpu-freq/index.txt +++ /dev/null @@ -1,56 +0,0 @@ - CPU frequency and voltage scaling code in the Linux(TM) kernel - - - L i n u x C P U F r e q - - - - - Dominik Brodowski - - - - Clock scaling allows you to change the clock speed of the CPUs on the - fly. This is a nice method to save battery power, because the lower - the clock speed, the less power the CPU consumes. - - - -Documents in this directory: ----------------------------- - -amd-powernow.txt - AMD powernow driver specific file. - -core.txt - General description of the CPUFreq core and - of CPUFreq notifiers. - -cpu-drivers.txt - How to implement a new cpufreq processor driver. - -cpufreq-nforce2.txt - nVidia nForce2 platform specific file. - -cpufreq-stats.txt - General description of sysfs cpufreq stats. - -index.txt - File index, Mailing list and Links (this document) - -pcc-cpufreq.txt - PCC cpufreq driver specific file. - - -Mailing List ------------- -There is a CPU frequency changing CVS commit and general list where -you can report bugs, problems or submit patches. To post a message, -send an email to linux-pm@vger.kernel.org. - -Links ------ -the FTP archives: -* ftp://ftp.linux.org.uk/pub/linux/cpufreq/ - -how to access the CVS repository: -* http://cvs.arm.linux.org.uk/ - -the CPUFreq Mailing list: -* http://vger.kernel.org/vger-lists.html#linux-pm - -Clock and voltage scaling for the SA-1100: -* http://www.lartmaker.nl/projects/scaling diff --git a/Documentation/index.rst b/Documentation/index.rst index e99d0bd2589d..4cf37ad1cd1d 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -99,6 +99,7 @@ needed). accounting/index block/index cdrom/index + cpu-freq/index ide/index fb/index fpga/index -- cgit v1.2.3 From c460f972d3036dbff73897bf7e4b21bdff7d3eaa Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 3 Mar 2020 14:52:04 +0100 Subject: docs: cpu-freq: convert core.txt to ReST - Add a SPDX header; - Adjust the document title, based on the original contents of cpu-freq/index.txt; - Use lists where needed; - Comment out the existing text-only index; - use ``foo`` to mark a literal expression with asterisk; - Adjust some title marks; - Add table markups; - Add notes markups; - Some whitespace fixes and new line breaks; - Add it to cpu-freq/index.rst. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/core.rst | 113 +++++++++++++++++++++++++++++++++++++++ Documentation/cpu-freq/core.txt | 112 -------------------------------------- Documentation/cpu-freq/index.rst | 2 + 3 files changed, 115 insertions(+), 112 deletions(-) create mode 100644 Documentation/cpu-freq/core.rst delete mode 100644 Documentation/cpu-freq/core.txt (limited to 'Documentation') diff --git a/Documentation/cpu-freq/core.rst b/Documentation/cpu-freq/core.rst new file mode 100644 index 000000000000..33cb90bd1d8f --- /dev/null +++ b/Documentation/cpu-freq/core.rst @@ -0,0 +1,113 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================================================= +General description of the CPUFreq core and CPUFreq notifiers +============================================================= + +Authors: + - Dominik Brodowski + - David Kimdon + - Rafael J. Wysocki + - Viresh Kumar + +.. Contents: + + 1. CPUFreq core and interfaces + 2. CPUFreq notifiers + 3. CPUFreq Table Generation with Operating Performance Point (OPP) + +1. General Information +====================== + +The CPUFreq core code is located in drivers/cpufreq/cpufreq.c. This +cpufreq code offers a standardized interface for the CPUFreq +architecture drivers (those pieces of code that do actual +frequency transitions), as well as to "notifiers". These are device +drivers or other part of the kernel that need to be informed of +policy changes (ex. thermal modules like ACPI) or of all +frequency changes (ex. timing code) or even need to force certain +speed limits (like LCD drivers on ARM architecture). Additionally, the +kernel "constant" loops_per_jiffy is updated on frequency changes +here. + +Reference counting of the cpufreq policies is done by cpufreq_cpu_get +and cpufreq_cpu_put, which make sure that the cpufreq driver is +correctly registered with the core, and will not be unloaded until +cpufreq_put_cpu is called. That also ensures that the respective cpufreq +policy doesn't get freed while being used. + +2. CPUFreq notifiers +==================== + +CPUFreq notifiers conform to the standard kernel notifier interface. +See linux/include/linux/notifier.h for details on notifiers. + +There are two different CPUFreq notifiers - policy notifiers and +transition notifiers. + + +2.1 CPUFreq policy notifiers +---------------------------- + +These are notified when a new policy is created or removed. + +The phase is specified in the second argument to the notifier. The phase is +CPUFREQ_CREATE_POLICY when the policy is first created and it is +CPUFREQ_REMOVE_POLICY when the policy is removed. + +The third argument, a ``void *pointer``, points to a struct cpufreq_policy +consisting of several values, including min, max (the lower and upper +frequencies (in kHz) of the new policy). + + +2.2 CPUFreq transition notifiers +-------------------------------- + +These are notified twice for each online CPU in the policy, when the +CPUfreq driver switches the CPU core frequency and this change has no +any external implications. + +The second argument specifies the phase - CPUFREQ_PRECHANGE or +CPUFREQ_POSTCHANGE. + +The third argument is a struct cpufreq_freqs with the following +values: + +===== =========================== +cpu number of the affected CPU +old old frequency +new new frequency +flags flags of the cpufreq driver +===== =========================== + +3. CPUFreq Table Generation with Operating Performance Point (OPP) +================================================================== +For details about OPP, see Documentation/power/opp.rst + +dev_pm_opp_init_cpufreq_table - + This function provides a ready to use conversion routine to translate + the OPP layer's internal information about the available frequencies + into a format readily providable to cpufreq. + + .. Warning:: + + Do not use this function in interrupt context. + + Example:: + + soc_pm_init() + { + /* Do things */ + r = dev_pm_opp_init_cpufreq_table(dev, &freq_table); + if (!r) + policy->freq_table = freq_table; + /* Do other things */ + } + + .. note:: + + This function is available only if CONFIG_CPU_FREQ is enabled in + addition to CONFIG_PM_OPP. + +dev_pm_opp_free_cpufreq_table + Free up the table allocated by dev_pm_opp_init_cpufreq_table diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt deleted file mode 100644 index ed577d9c154b..000000000000 --- a/Documentation/cpu-freq/core.txt +++ /dev/null @@ -1,112 +0,0 @@ - CPU frequency and voltage scaling code in the Linux(TM) kernel - - - L i n u x C P U F r e q - - C P U F r e q C o r e - - - Dominik Brodowski - David Kimdon - Rafael J. Wysocki - Viresh Kumar - - - - Clock scaling allows you to change the clock speed of the CPUs on the - fly. This is a nice method to save battery power, because the lower - the clock speed, the less power the CPU consumes. - - -Contents: ---------- -1. CPUFreq core and interfaces -2. CPUFreq notifiers -3. CPUFreq Table Generation with Operating Performance Point (OPP) - -1. General Information -======================= - -The CPUFreq core code is located in drivers/cpufreq/cpufreq.c. This -cpufreq code offers a standardized interface for the CPUFreq -architecture drivers (those pieces of code that do actual -frequency transitions), as well as to "notifiers". These are device -drivers or other part of the kernel that need to be informed of -policy changes (ex. thermal modules like ACPI) or of all -frequency changes (ex. timing code) or even need to force certain -speed limits (like LCD drivers on ARM architecture). Additionally, the -kernel "constant" loops_per_jiffy is updated on frequency changes -here. - -Reference counting of the cpufreq policies is done by cpufreq_cpu_get -and cpufreq_cpu_put, which make sure that the cpufreq driver is -correctly registered with the core, and will not be unloaded until -cpufreq_put_cpu is called. That also ensures that the respective cpufreq -policy doesn't get freed while being used. - -2. CPUFreq notifiers -==================== - -CPUFreq notifiers conform to the standard kernel notifier interface. -See linux/include/linux/notifier.h for details on notifiers. - -There are two different CPUFreq notifiers - policy notifiers and -transition notifiers. - - -2.1 CPUFreq policy notifiers ----------------------------- - -These are notified when a new policy is created or removed. - -The phase is specified in the second argument to the notifier. The phase is -CPUFREQ_CREATE_POLICY when the policy is first created and it is -CPUFREQ_REMOVE_POLICY when the policy is removed. - -The third argument, a void *pointer, points to a struct cpufreq_policy -consisting of several values, including min, max (the lower and upper -frequencies (in kHz) of the new policy). - - -2.2 CPUFreq transition notifiers --------------------------------- - -These are notified twice for each online CPU in the policy, when the -CPUfreq driver switches the CPU core frequency and this change has no -any external implications. - -The second argument specifies the phase - CPUFREQ_PRECHANGE or -CPUFREQ_POSTCHANGE. - -The third argument is a struct cpufreq_freqs with the following -values: -cpu - number of the affected CPU -old - old frequency -new - new frequency -flags - flags of the cpufreq driver - -3. CPUFreq Table Generation with Operating Performance Point (OPP) -================================================================== -For details about OPP, see Documentation/power/opp.rst - -dev_pm_opp_init_cpufreq_table - - This function provides a ready to use conversion routine to translate - the OPP layer's internal information about the available frequencies - into a format readily providable to cpufreq. - - WARNING: Do not use this function in interrupt context. - - Example: - soc_pm_init() - { - /* Do things */ - r = dev_pm_opp_init_cpufreq_table(dev, &freq_table); - if (!r) - policy->freq_table = freq_table; - /* Do other things */ - } - - NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in - addition to CONFIG_PM_OPP. - -dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table diff --git a/Documentation/cpu-freq/index.rst b/Documentation/cpu-freq/index.rst index 1bff3dfddd23..53501c58f41d 100644 --- a/Documentation/cpu-freq/index.rst +++ b/Documentation/cpu-freq/index.rst @@ -14,6 +14,8 @@ Author: Dominik Brodowski .. toctree:: :maxdepth: 1 + core + Mailing List ------------ There is a CPU frequency changing CVS commit and general list where -- cgit v1.2.3 From 8f92058987a1e3ec851218420ac1ef554aca41fe Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 3 Mar 2020 14:52:05 +0100 Subject: docs: cpu-freq: convert cpu-drivers.txt to ReST - Add a SPDX header; - Add a document title, based on the original contents of cpu-freq/index.txt; - Use lists where needed; - Comment out the existing text-only index; - Adjust some title marks; - Add table markups; - Add notes markups; - Mark literal blocks as such; - use ``foo`` for literal texts; - Some whitespace fixes and new line breaks; - Add it to cpu-freq/index.rst. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpu-drivers.rst | 292 ++++++++++++++++++++++++++++++++ Documentation/cpu-freq/cpu-drivers.txt | 295 --------------------------------- Documentation/cpu-freq/index.rst | 1 + 3 files changed, 293 insertions(+), 295 deletions(-) create mode 100644 Documentation/cpu-freq/cpu-drivers.rst delete mode 100644 Documentation/cpu-freq/cpu-drivers.txt (limited to 'Documentation') diff --git a/Documentation/cpu-freq/cpu-drivers.rst b/Documentation/cpu-freq/cpu-drivers.rst new file mode 100644 index 000000000000..a697278ce190 --- /dev/null +++ b/Documentation/cpu-freq/cpu-drivers.rst @@ -0,0 +1,292 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=============================================== +How to Implement a new CPUFreq Processor Driver +=============================================== + +Authors: + + + - Dominik Brodowski + - Rafael J. Wysocki + - Viresh Kumar + +.. Contents + + 1. What To Do? + 1.1 Initialization + 1.2 Per-CPU Initialization + 1.3 verify + 1.4 target/target_index or setpolicy? + 1.5 target/target_index + 1.6 setpolicy + 1.7 get_intermediate and target_intermediate + 2. Frequency Table Helpers + + + +1. What To Do? +============== + +So, you just got a brand-new CPU / chipset with datasheets and want to +add cpufreq support for this CPU / chipset? Great. Here are some hints +on what is necessary: + + +1.1 Initialization +------------------ + +First of all, in an __initcall level 7 (module_init()) or later +function check whether this kernel runs on the right CPU and the right +chipset. If so, register a struct cpufreq_driver with the CPUfreq core +using cpufreq_register_driver() + +What shall this struct cpufreq_driver contain? + + .name - The name of this driver. + + .init - A pointer to the per-policy initialization function. + + .verify - A pointer to a "verification" function. + + .setpolicy _or_ .fast_switch _or_ .target _or_ .target_index - See + below on the differences. + +And optionally + + .flags - Hints for the cpufreq core. + + .driver_data - cpufreq driver specific data. + + .resolve_freq - Returns the most appropriate frequency for a target + frequency. Doesn't change the frequency though. + + .get_intermediate and target_intermediate - Used to switch to stable + frequency while changing CPU frequency. + + .get - Returns current frequency of the CPU. + + .bios_limit - Returns HW/BIOS max frequency limitations for the CPU. + + .exit - A pointer to a per-policy cleanup function called during + CPU_POST_DEAD phase of cpu hotplug process. + + .stop_cpu - A pointer to a per-policy stop function called during + CPU_DOWN_PREPARE phase of cpu hotplug process. + + .suspend - A pointer to a per-policy suspend function which is called + with interrupts disabled and _after_ the governor is stopped for the + policy. + + .resume - A pointer to a per-policy resume function which is called + with interrupts disabled and _before_ the governor is started again. + + .ready - A pointer to a per-policy ready function which is called after + the policy is fully initialized. + + .attr - A pointer to a NULL-terminated list of "struct freq_attr" which + allow to export values to sysfs. + + .boost_enabled - If set, boost frequencies are enabled. + + .set_boost - A pointer to a per-policy function to enable/disable boost + frequencies. + + +1.2 Per-CPU Initialization +-------------------------- + +Whenever a new CPU is registered with the device model, or after the +cpufreq driver registers itself, the per-policy initialization function +cpufreq_driver.init is called if no cpufreq policy existed for the CPU. +Note that the .init() and .exit() routines are called only once for the +policy and not for each CPU managed by the policy. It takes a ``struct +cpufreq_policy *policy`` as argument. What to do now? + +If necessary, activate the CPUfreq support on your CPU. + +Then, the driver must fill in the following values: + ++-----------------------------------+--------------------------------------+ +|policy->cpuinfo.min_freq _and_ | | +|policy->cpuinfo.max_freq | the minimum and maximum frequency | +| | (in kHz) which is supported by | +| | this CPU | ++-----------------------------------+--------------------------------------+ +|policy->cpuinfo.transition_latency | the time it takes on this CPU to | +| | switch between two frequencies in | +| | nanoseconds (if appropriate, else | +| | specify CPUFREQ_ETERNAL) | ++-----------------------------------+--------------------------------------+ +|policy->cur | The current operating frequency of | +| | this CPU (if appropriate) | ++-----------------------------------+--------------------------------------+ +|policy->min, | | +|policy->max, | | +|policy->policy and, if necessary, | | +|policy->governor | must contain the "default policy" for| +| | this CPU. A few moments later, | +| | cpufreq_driver.verify and either | +| | cpufreq_driver.setpolicy or | +| | cpufreq_driver.target/target_index is| +| | called with these values. | ++-----------------------------------+--------------------------------------+ +|policy->cpus | Update this with the masks of the | +| | (online + offline) CPUs that do DVFS | +| | along with this CPU (i.e. that share| +| | clock/voltage rails with it). | ++-----------------------------------+--------------------------------------+ + +For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the +frequency table helpers might be helpful. See the section 2 for more information +on them. + + +1.3 verify +---------- + +When the user decides a new policy (consisting of +"policy,governor,min,max") shall be set, this policy must be validated +so that incompatible values can be corrected. For verifying these +values cpufreq_verify_within_limits(``struct cpufreq_policy *policy``, +``unsigned int min_freq``, ``unsigned int max_freq``) function might be helpful. +See section 2 for details on frequency table helpers. + +You need to make sure that at least one valid frequency (or operating +range) is within policy->min and policy->max. If necessary, increase +policy->max first, and only if this is no solution, decrease policy->min. + + +1.4 target or target_index or setpolicy or fast_switch? +------------------------------------------------------- + +Most cpufreq drivers or even most cpu frequency scaling algorithms +only allow the CPU frequency to be set to predefined fixed values. For +these, you use the ->target(), ->target_index() or ->fast_switch() +callbacks. + +Some cpufreq capable processors switch the frequency between certain +limits on their own. These shall use the ->setpolicy() callback. + + +1.5. target/target_index +------------------------ + +The target_index call has two arguments: ``struct cpufreq_policy *policy``, +and ``unsigned int`` index (into the exposed frequency table). + +The CPUfreq driver must set the new frequency when called here. The +actual frequency must be determined by freq_table[index].frequency. + +It should always restore to earlier frequency (i.e. policy->restore_freq) in +case of errors, even if we switched to intermediate frequency earlier. + +Deprecated +---------- +The target call has three arguments: ``struct cpufreq_policy *policy``, +unsigned int target_frequency, unsigned int relation. + +The CPUfreq driver must set the new frequency when called here. The +actual frequency must be determined using the following rules: + +- keep close to "target_freq" +- policy->min <= new_freq <= policy->max (THIS MUST BE VALID!!!) +- if relation==CPUFREQ_REL_L, try to select a new_freq higher than or equal + target_freq. ("L for lowest, but no lower than") +- if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal + target_freq. ("H for highest, but no higher than") + +Here again the frequency table helper might assist you - see section 2 +for details. + +1.6. fast_switch +---------------- + +This function is used for frequency switching from scheduler's context. +Not all drivers are expected to implement it, as sleeping from within +this callback isn't allowed. This callback must be highly optimized to +do switching as fast as possible. + +This function has two arguments: ``struct cpufreq_policy *policy`` and +``unsigned int target_frequency``. + + +1.7 setpolicy +------------- + +The setpolicy call only takes a ``struct cpufreq_policy *policy`` as +argument. You need to set the lower limit of the in-processor or +in-chipset dynamic frequency switching to policy->min, the upper limit +to policy->max, and -if supported- select a performance-oriented +setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a +powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check +the reference implementation in drivers/cpufreq/longrun.c + +1.8 get_intermediate and target_intermediate +-------------------------------------------- + +Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset. + +get_intermediate should return a stable intermediate frequency platform wants to +switch to, and target_intermediate() should set CPU to that frequency, before +jumping to the frequency corresponding to 'index'. Core will take care of +sending notifications and driver doesn't have to handle them in +target_intermediate() or target_index(). + +Drivers can return '0' from get_intermediate() in case they don't wish to switch +to intermediate frequency for some target frequency. In that case core will +directly call ->target_index(). + +NOTE: ->target_index() should restore to policy->restore_freq in case of +failures as core would send notifications for that. + + +2. Frequency Table Helpers +========================== + +As most cpufreq processors only allow for being set to a few specific +frequencies, a "frequency table" with some functions might assist in +some work of the processor driver. Such a "frequency table" consists of +an array of struct cpufreq_frequency_table entries, with driver specific +values in "driver_data", the corresponding frequency in "frequency" and +flags set. At the end of the table, you need to add a +cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END. +And if you want to skip one entry in the table, set the frequency to +CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any +particular order, but if they are cpufreq core will do DVFS a bit +quickly for them as search for best match is faster. + +The cpufreq table is verified automatically by the core if the policy contains a +valid pointer in its policy->freq_table field. + +cpufreq_frequency_table_verify() assures that at least one valid +frequency is within policy->min and policy->max, and all other criteria +are met. This is helpful for the ->verify call. + +cpufreq_frequency_table_target() is the corresponding frequency table +helper for the ->target stage. Just pass the values to this function, +and this function returns the of the frequency table entry which +contains the frequency the CPU shall be set to. + +The following macros can be used as iterators over cpufreq_frequency_table: + +cpufreq_for_each_entry(pos, table) - iterates over all entries of frequency +table. + +cpufreq_for_each_valid_entry(pos, table) - iterates over all entries, +excluding CPUFREQ_ENTRY_INVALID frequencies. +Use arguments "pos" - a ``cpufreq_frequency_table *`` as a loop cursor and +"table" - the ``cpufreq_frequency_table *`` you want to iterate over. + +For example:: + + struct cpufreq_frequency_table *pos, *driver_freq_table; + + cpufreq_for_each_entry(pos, driver_freq_table) { + /* Do something with pos */ + pos->frequency = ... + } + +If you need to work with the position of pos within driver_freq_table, +do not subtract the pointers, as it is quite costly. Instead, use the +macros cpufreq_for_each_entry_idx() and cpufreq_for_each_valid_entry_idx(). diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt deleted file mode 100644 index 6e353d00cdc6..000000000000 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ /dev/null @@ -1,295 +0,0 @@ - CPU frequency and voltage scaling code in the Linux(TM) kernel - - - L i n u x C P U F r e q - - C P U D r i v e r s - - - information for developers - - - - Dominik Brodowski - Rafael J. Wysocki - Viresh Kumar - - - - Clock scaling allows you to change the clock speed of the CPUs on the - fly. This is a nice method to save battery power, because the lower - the clock speed, the less power the CPU consumes. - - -Contents: ---------- -1. What To Do? -1.1 Initialization -1.2 Per-CPU Initialization -1.3 verify -1.4 target/target_index or setpolicy? -1.5 target/target_index -1.6 setpolicy -1.7 get_intermediate and target_intermediate -2. Frequency Table Helpers - - - -1. What To Do? -============== - -So, you just got a brand-new CPU / chipset with datasheets and want to -add cpufreq support for this CPU / chipset? Great. Here are some hints -on what is necessary: - - -1.1 Initialization ------------------- - -First of all, in an __initcall level 7 (module_init()) or later -function check whether this kernel runs on the right CPU and the right -chipset. If so, register a struct cpufreq_driver with the CPUfreq core -using cpufreq_register_driver() - -What shall this struct cpufreq_driver contain? - - .name - The name of this driver. - - .init - A pointer to the per-policy initialization function. - - .verify - A pointer to a "verification" function. - - .setpolicy _or_ .fast_switch _or_ .target _or_ .target_index - See - below on the differences. - -And optionally - - .flags - Hints for the cpufreq core. - - .driver_data - cpufreq driver specific data. - - .resolve_freq - Returns the most appropriate frequency for a target - frequency. Doesn't change the frequency though. - - .get_intermediate and target_intermediate - Used to switch to stable - frequency while changing CPU frequency. - - .get - Returns current frequency of the CPU. - - .bios_limit - Returns HW/BIOS max frequency limitations for the CPU. - - .exit - A pointer to a per-policy cleanup function called during - CPU_POST_DEAD phase of cpu hotplug process. - - .stop_cpu - A pointer to a per-policy stop function called during - CPU_DOWN_PREPARE phase of cpu hotplug process. - - .suspend - A pointer to a per-policy suspend function which is called - with interrupts disabled and _after_ the governor is stopped for the - policy. - - .resume - A pointer to a per-policy resume function which is called - with interrupts disabled and _before_ the governor is started again. - - .ready - A pointer to a per-policy ready function which is called after - the policy is fully initialized. - - .attr - A pointer to a NULL-terminated list of "struct freq_attr" which - allow to export values to sysfs. - - .boost_enabled - If set, boost frequencies are enabled. - - .set_boost - A pointer to a per-policy function to enable/disable boost - frequencies. - - -1.2 Per-CPU Initialization --------------------------- - -Whenever a new CPU is registered with the device model, or after the -cpufreq driver registers itself, the per-policy initialization function -cpufreq_driver.init is called if no cpufreq policy existed for the CPU. -Note that the .init() and .exit() routines are called only once for the -policy and not for each CPU managed by the policy. It takes a struct -cpufreq_policy *policy as argument. What to do now? - -If necessary, activate the CPUfreq support on your CPU. - -Then, the driver must fill in the following values: - -policy->cpuinfo.min_freq _and_ -policy->cpuinfo.max_freq - the minimum and maximum frequency - (in kHz) which is supported by - this CPU -policy->cpuinfo.transition_latency the time it takes on this CPU to - switch between two frequencies in - nanoseconds (if appropriate, else - specify CPUFREQ_ETERNAL) - -policy->cur The current operating frequency of - this CPU (if appropriate) -policy->min, -policy->max, -policy->policy and, if necessary, -policy->governor must contain the "default policy" for - this CPU. A few moments later, - cpufreq_driver.verify and either - cpufreq_driver.setpolicy or - cpufreq_driver.target/target_index is called - with these values. -policy->cpus Update this with the masks of the - (online + offline) CPUs that do DVFS - along with this CPU (i.e. that share - clock/voltage rails with it). - -For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the -frequency table helpers might be helpful. See the section 2 for more information -on them. - - -1.3 verify ----------- - -When the user decides a new policy (consisting of -"policy,governor,min,max") shall be set, this policy must be validated -so that incompatible values can be corrected. For verifying these -values cpufreq_verify_within_limits(struct cpufreq_policy *policy, -unsigned int min_freq, unsigned int max_freq) function might be helpful. -See section 2 for details on frequency table helpers. - -You need to make sure that at least one valid frequency (or operating -range) is within policy->min and policy->max. If necessary, increase -policy->max first, and only if this is no solution, decrease policy->min. - - -1.4 target or target_index or setpolicy or fast_switch? -------------------------------------------------------- - -Most cpufreq drivers or even most cpu frequency scaling algorithms -only allow the CPU frequency to be set to predefined fixed values. For -these, you use the ->target(), ->target_index() or ->fast_switch() -callbacks. - -Some cpufreq capable processors switch the frequency between certain -limits on their own. These shall use the ->setpolicy() callback. - - -1.5. target/target_index ------------------------- - -The target_index call has two arguments: struct cpufreq_policy *policy, -and unsigned int index (into the exposed frequency table). - -The CPUfreq driver must set the new frequency when called here. The -actual frequency must be determined by freq_table[index].frequency. - -It should always restore to earlier frequency (i.e. policy->restore_freq) in -case of errors, even if we switched to intermediate frequency earlier. - -Deprecated: ----------- -The target call has three arguments: struct cpufreq_policy *policy, -unsigned int target_frequency, unsigned int relation. - -The CPUfreq driver must set the new frequency when called here. The -actual frequency must be determined using the following rules: - -- keep close to "target_freq" -- policy->min <= new_freq <= policy->max (THIS MUST BE VALID!!!) -- if relation==CPUFREQ_REL_L, try to select a new_freq higher than or equal - target_freq. ("L for lowest, but no lower than") -- if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal - target_freq. ("H for highest, but no higher than") - -Here again the frequency table helper might assist you - see section 2 -for details. - -1.6. fast_switch ----------------- - -This function is used for frequency switching from scheduler's context. -Not all drivers are expected to implement it, as sleeping from within -this callback isn't allowed. This callback must be highly optimized to -do switching as fast as possible. - -This function has two arguments: struct cpufreq_policy *policy and -unsigned int target_frequency. - - -1.7 setpolicy -------------- - -The setpolicy call only takes a struct cpufreq_policy *policy as -argument. You need to set the lower limit of the in-processor or -in-chipset dynamic frequency switching to policy->min, the upper limit -to policy->max, and -if supported- select a performance-oriented -setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a -powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check -the reference implementation in drivers/cpufreq/longrun.c - -1.8 get_intermediate and target_intermediate --------------------------------------------- - -Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset. - -get_intermediate should return a stable intermediate frequency platform wants to -switch to, and target_intermediate() should set CPU to that frequency, before -jumping to the frequency corresponding to 'index'. Core will take care of -sending notifications and driver doesn't have to handle them in -target_intermediate() or target_index(). - -Drivers can return '0' from get_intermediate() in case they don't wish to switch -to intermediate frequency for some target frequency. In that case core will -directly call ->target_index(). - -NOTE: ->target_index() should restore to policy->restore_freq in case of -failures as core would send notifications for that. - - -2. Frequency Table Helpers -========================== - -As most cpufreq processors only allow for being set to a few specific -frequencies, a "frequency table" with some functions might assist in -some work of the processor driver. Such a "frequency table" consists of -an array of struct cpufreq_frequency_table entries, with driver specific -values in "driver_data", the corresponding frequency in "frequency" and -flags set. At the end of the table, you need to add a -cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END. -And if you want to skip one entry in the table, set the frequency to -CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any -particular order, but if they are cpufreq core will do DVFS a bit -quickly for them as search for best match is faster. - -The cpufreq table is verified automatically by the core if the policy contains a -valid pointer in its policy->freq_table field. - -cpufreq_frequency_table_verify() assures that at least one valid -frequency is within policy->min and policy->max, and all other criteria -are met. This is helpful for the ->verify call. - -cpufreq_frequency_table_target() is the corresponding frequency table -helper for the ->target stage. Just pass the values to this function, -and this function returns the of the frequency table entry which -contains the frequency the CPU shall be set to. - -The following macros can be used as iterators over cpufreq_frequency_table: - -cpufreq_for_each_entry(pos, table) - iterates over all entries of frequency -table. - -cpufreq_for_each_valid_entry(pos, table) - iterates over all entries, -excluding CPUFREQ_ENTRY_INVALID frequencies. -Use arguments "pos" - a cpufreq_frequency_table * as a loop cursor and -"table" - the cpufreq_frequency_table * you want to iterate over. - -For example: - - struct cpufreq_frequency_table *pos, *driver_freq_table; - - cpufreq_for_each_entry(pos, driver_freq_table) { - /* Do something with pos */ - pos->frequency = ... - } - -If you need to work with the position of pos within driver_freq_table, -do not subtract the pointers, as it is quite costly. Instead, use the -macros cpufreq_for_each_entry_idx() and cpufreq_for_each_valid_entry_idx(). diff --git a/Documentation/cpu-freq/index.rst b/Documentation/cpu-freq/index.rst index 53501c58f41d..fcc58b2eee01 100644 --- a/Documentation/cpu-freq/index.rst +++ b/Documentation/cpu-freq/index.rst @@ -15,6 +15,7 @@ Author: Dominik Brodowski :maxdepth: 1 core + cpu-drivers Mailing List ------------ -- cgit v1.2.3 From aadfa206e9bb6fcee0b9fcd2fc39804026cdefcc Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 3 Mar 2020 14:52:06 +0100 Subject: docs: cpu-freq: convert cpufreq-stats.txt to ReST - Add a SPDX header; - Add a document title, based on the original contents of cpu-freq/index.txt; - Use lists where needed; - Comment out the existing text-only index; - Adjust some title marks; - Use bold on some places; - Mark literal blocks as such; - Some whitespace fixes and new line breaks; - Add it to cpu-freq/index.rst. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpufreq-stats.rst | 136 +++++++++++++++++++++++++++++++ Documentation/cpu-freq/cpufreq-stats.txt | 127 ----------------------------- Documentation/cpu-freq/index.rst | 1 + 3 files changed, 137 insertions(+), 127 deletions(-) create mode 100644 Documentation/cpu-freq/cpufreq-stats.rst delete mode 100644 Documentation/cpu-freq/cpufreq-stats.txt (limited to 'Documentation') diff --git a/Documentation/cpu-freq/cpufreq-stats.rst b/Documentation/cpu-freq/cpufreq-stats.rst new file mode 100644 index 000000000000..9ad695b1c7db --- /dev/null +++ b/Documentation/cpu-freq/cpufreq-stats.rst @@ -0,0 +1,136 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================================== +General Description of sysfs CPUFreq Stats +========================================== + +information for users + + +Author: Venkatesh Pallipadi + +.. Contents + + 1. Introduction + 2. Statistics Provided (with example) + 3. Configuring cpufreq-stats + + +1. Introduction +=============== + +cpufreq-stats is a driver that provides CPU frequency statistics for each CPU. +These statistics are provided in /sysfs as a bunch of read_only interfaces. This +interface (when configured) will appear in a separate directory under cpufreq +in /sysfs (/devices/system/cpu/cpuX/cpufreq/stats/) for each CPU. +Various statistics will form read_only files under this directory. + +This driver is designed to be independent of any particular cpufreq_driver +that may be running on your CPU. So, it will work with any cpufreq_driver. + + +2. Statistics Provided (with example) +===================================== + +cpufreq stats provides following statistics (explained in detail below). + +- time_in_state +- total_trans +- trans_table + +All the statistics will be from the time the stats driver has been inserted +(or the time the stats were reset) to the time when a read of a particular +statistic is done. Obviously, stats driver will not have any information +about the frequency transitions before the stats driver insertion. + +:: + + :/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l + total 0 + drwxr-xr-x 2 root root 0 May 14 16:06 . + drwxr-xr-x 3 root root 0 May 14 15:58 .. + --w------- 1 root root 4096 May 14 16:06 reset + -r--r--r-- 1 root root 4096 May 14 16:06 time_in_state + -r--r--r-- 1 root root 4096 May 14 16:06 total_trans + -r--r--r-- 1 root root 4096 May 14 16:06 trans_table + +- **reset** + +Write-only attribute that can be used to reset the stat counters. This can be +useful for evaluating system behaviour under different governors without the +need for a reboot. + +- **time_in_state** + +This gives the amount of time spent in each of the frequencies supported by +this CPU. The cat output will have "