summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-ibft10
-rw-r--r--Documentation/ABI/testing/sysfs-platform-hidma9
-rw-r--r--Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl29
-rw-r--r--Documentation/DocBook/80211.tmpl2
-rw-r--r--Documentation/DocBook/crypto-API.tmpl6
-rw-r--r--Documentation/DocBook/device-drivers.tmpl1
-rw-r--r--Documentation/DocBook/media/dvb/net.xml2
-rw-r--r--Documentation/DocBook/media/v4l/compat.xml38
-rw-r--r--Documentation/DocBook/media/v4l/controls.xml31
-rw-r--r--Documentation/DocBook/media/v4l/dev-sdr.xml6
-rw-r--r--Documentation/DocBook/media/v4l/dev-subdev.xml6
-rw-r--r--Documentation/DocBook/media/v4l/io.xml6
-rw-r--r--Documentation/DocBook/media/v4l/selection-api.xml9
-rw-r--r--Documentation/DocBook/media/v4l/subdev-formats.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-create-bufs.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml18
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml11
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-expbuf.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-edid.xml10
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-selection.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-streamon.xml8
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml6
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml6
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg474
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg499
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg695
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg741
-rw-r--r--Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg858
-rw-r--r--Documentation/RCU/Design/Data-Structures/Data-Structures.html1333
-rw-r--r--Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg939
-rw-r--r--Documentation/RCU/Design/Data-Structures/TreeLevel.svg828
-rw-r--r--Documentation/RCU/Design/Data-Structures/TreeMapping.svg305
-rw-r--r--Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg380
-rw-r--r--Documentation/RCU/Design/Data-Structures/blkd_task.svg843
-rw-r--r--Documentation/RCU/Design/Data-Structures/nxtlist.svg396
-rw-r--r--Documentation/RCU/Design/Requirements/2013-08-is-it-dead.pngbin100825 -> 0 bytes
-rw-r--r--Documentation/RCU/Design/Requirements/RCUApplicability.svg237
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html941
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.htmlx2741
-rwxr-xr-xDocumentation/RCU/Design/htmlqqz.sh108
-rw-r--r--Documentation/RCU/trace.txt10
-rw-r--r--Documentation/RCU/whatisRCU.txt22
-rw-r--r--Documentation/accounting/getdelays.c5
-rw-r--r--Documentation/acpi/initrd_table_override.txt65
-rw-r--r--Documentation/arm64/booting.txt4
-rw-r--r--Documentation/arm64/silicon-errata.txt2
-rw-r--r--Documentation/block/queue-sysfs.txt9
-rw-r--r--Documentation/block/writeback_cache_control.txt4
-rw-r--r--Documentation/device-mapper/cache-policies.txt34
-rw-r--r--Documentation/device-mapper/statistics.txt2
-rw-r--r--Documentation/devicetree/bindings/arc/archs-pct.txt2
-rw-r--r--Documentation/devicetree/bindings/arc/eznps.txt7
-rw-r--r--Documentation/devicetree/bindings/arc/pct.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt50
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic.txt3
-rw-r--r--Documentation/devicetree/bindings/arm/arm-boards8
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-at91.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.txt1
-rw-r--r--Documentation/devicetree/bindings/arm/fsl.txt4
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/omap/omap.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/oxnas.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt3
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt2
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt92
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-platform.txt4
-rw-r--r--Documentation/devicetree/bindings/btmrvl.txt29
-rw-r--r--Documentation/devicetree/bindings/clock/microchip,pic32.txt39
-rw-r--r--Documentation/devicetree/bindings/clock/qca,ath79-pll.txt6
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-imx-scc.txt21
-rw-r--r--Documentation/devicetree/bindings/crypto/samsung-sss.txt6
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt26
-rw-r--r--Documentation/devicetree/bindings/devfreq/exynos-bus.txt409
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt26
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt27
-rw-r--r--Documentation/devicetree/bindings/dma/mv-xor.txt5
-rw-r--r--Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt55
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_bam_dma.txt2
-rw-r--r--Documentation/devicetree/bindings/dma/snps-dma.txt11
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt36
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-74x164.txt4
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt20
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xlp.txt3
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio.txt26
-rw-r--r--Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.txt161
-rw-r--r--Documentation/devicetree/bindings/gpio/wd,mbl-gpio.txt38
-rw-r--r--Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt37
-rw-r--r--Documentation/devicetree/bindings/hwmon/ltc2978.txt1
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-octeon.txt6
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-rcar.txt3
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-rk3x.txt4
-rw-r--r--Documentation/devicetree/bindings/input/gpio-keys.txt10
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt23
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt34
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.txt2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt4
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt57
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt17
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt30
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.txt70
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.txt1
-rw-r--r--Documentation/devicetree/bindings/leds/common.txt3
-rw-r--r--Documentation/devicetree/bindings/leds/leds-gpio.txt2
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv7180.txt29
-rw-r--r--Documentation/devicetree/bindings/media/rcar_vin.txt12
-rw-r--r--Documentation/devicetree/bindings/media/xilinx/video.txt2
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/exynos-srom.txt79
-rw-r--r--Documentation/devicetree/bindings/mips/brcm/soc.txt3
-rw-r--r--Documentation/devicetree/bindings/mips/cavium/ciu3.txt27
-rw-r--r--Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt81
-rw-r--r--Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt1
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-st.txt4
-rw-r--r--Documentation/devicetree/bindings/mmc/tmio_mmc.txt3
-rw-r--r--Documentation/devicetree/bindings/mmc/usdhi6rol0.txt6
-rw-r--r--Documentation/devicetree/bindings/mtd/arm-versatile.txt20
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl-quadspi.txt3
-rw-r--r--Documentation/devicetree/bindings/net/apm-xgene-enet.txt2
-rw-r--r--Documentation/devicetree/bindings/net/cpsw.txt6
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa.txt2
-rw-r--r--Documentation/devicetree/bindings/net/dsa/marvell.txt35
-rw-r--r--Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt57
-rw-r--r--Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt30
-rw-r--r--Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt56
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-net.txt7
-rw-r--r--Documentation/devicetree/bindings/net/microchip,enc28j60.txt59
-rw-r--r--Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt31
-rw-r--r--Documentation/devicetree/bindings/net/phy.txt3
-rw-r--r--Documentation/devicetree/bindings/net/stmmac.txt2
-rw-r--r--Documentation/devicetree/bindings/net/wireless/marvell-sd8xxx.txt63
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt23
-rw-r--r--Documentation/devicetree/bindings/numa.txt275
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt18
-rw-r--r--Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt224
-rw-r--r--Documentation/devicetree/bindings/pci/pci-armada8k.txt38
-rw-r--r--Documentation/devicetree/bindings/pci/pci-keystone.txt1
-rw-r--r--Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt733
-rw-r--r--Documentation/devicetree/bindings/phy/phy-lpc18xx-usb-otg.txt2
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt18
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt22
-rw-r--r--Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt12
-rw-r--r--Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt38
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt6
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt4
-rw-r--r--Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt48
-rw-r--r--Documentation/devicetree/bindings/power/reset/gpio-poweroff.txt (renamed from Documentation/devicetree/bindings/gpio/gpio-poweroff.txt)0
-rw-r--r--Documentation/devicetree/bindings/power/reset/gpio-restart.txt (renamed from Documentation/devicetree/bindings/gpio/gpio-restart.txt)0
-rw-r--r--Documentation/devicetree/bindings/power/rockchip-io-domain.txt4
-rw-r--r--Documentation/devicetree/bindings/regmap/regmap.txt59
-rw-r--r--Documentation/devicetree/bindings/regulator/max8973-regulator.txt7
-rw-r--r--Documentation/devicetree/bindings/regulator/pv88080.txt49
-rw-r--r--Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt37
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator-max77620.txt22
-rw-r--r--Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt10
-rw-r--r--Documentation/devicetree/bindings/regulator/twl-regulator.txt6
-rw-r--r--Documentation/devicetree/bindings/reset/oxnas,reset.txt58
-rw-r--r--Documentation/devicetree/bindings/rng/hisi-rng.txt12
-rw-r--r--Documentation/devicetree/bindings/rtc/s3c-rtc.txt7
-rw-r--r--Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt29
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/auxadc.txt21
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/pwrap.txt1
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/grf.txt35
-rw-r--r--Documentation/devicetree/bindings/soc/rockchip/power_domain.txt47
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcbsp.txt51
-rw-r--r--Documentation/devicetree/bindings/sound/fsl-sai.txt9
-rw-r--r--Documentation/devicetree/bindings/sound/pcm5102a.txt13
-rw-r--r--Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt5
-rw-r--r--Documentation/devicetree/bindings/spi/ti_qspi.txt7
-rw-r--r--Documentation/devicetree/bindings/timer/arm,mps2-timer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt15
-rw-r--r--Documentation/devicetree/bindings/timer/snps,arc-timer.txt31
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt14
-rw-r--r--Documentation/devicetree/bindings/timer/snps,archs-rtc.txt14
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.txt120
-rw-r--r--Documentation/devicetree/bindings/usb/usb-xhci.txt1
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.txt11
-rw-r--r--Documentation/devicetree/bindings/watchdog/microchip,pic32-dmt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/microchip,pic32-wdt.txt18
-rw-r--r--Documentation/driver-model/devres.txt2
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/cramfs.txt2
-rw-r--r--Documentation/filesystems/porting53
-rw-r--r--Documentation/filesystems/tmpfs.txt2
-rw-r--r--Documentation/filesystems/vfs.txt6
-rw-r--r--Documentation/gpio/driver.txt97
-rw-r--r--Documentation/hwmon/fam15h_power65
-rw-r--r--Documentation/hwmon/it8715
-rw-r--r--Documentation/hwmon/max3172234
-rw-r--r--Documentation/i2c/i2c-topology370
-rw-r--r--Documentation/input/event-codes.txt4
-rw-r--r--Documentation/kernel-parameters.txt81
-rw-r--r--Documentation/livepatch/livepatch.txt394
-rw-r--r--Documentation/livepatch/module-elf-format.txt311
-rw-r--r--Documentation/locking/lockdep-design.txt4
-rw-r--r--Documentation/md-cluster.txt6
-rw-r--r--Documentation/memory-barriers.txt117
-rw-r--r--Documentation/networking/altera_tse.txt4
-rw-r--r--Documentation/networking/bonding.txt4
-rw-r--r--Documentation/networking/checksum-offloads.txt14
-rw-r--r--Documentation/networking/dsa/bcm_sf2.txt2
-rw-r--r--Documentation/networking/dsa/dsa.txt20
-rw-r--r--Documentation/networking/filter.txt101
-rw-r--r--Documentation/networking/gen_stats.txt6
-rw-r--r--Documentation/networking/ip-sysctl.txt10
-rw-r--r--Documentation/networking/ipvlan.txt6
-rw-r--r--Documentation/networking/mac80211-injection.txt17
-rw-r--r--Documentation/networking/netdev-features.txt10
-rw-r--r--Documentation/networking/netdevices.txt9
-rw-r--r--Documentation/networking/pktgen.txt6
-rw-r--r--Documentation/networking/segmentation-offloads.txt130
-rw-r--r--Documentation/networking/stmmac.txt44
-rw-r--r--Documentation/networking/switchdev.txt30
-rw-r--r--Documentation/networking/timestamping.txt48
-rw-r--r--Documentation/networking/vrf.txt2
-rw-r--r--Documentation/networking/xfrm_sync.txt6
-rw-r--r--Documentation/phy.txt16
-rw-r--r--Documentation/power/runtime_pm.txt4
-rw-r--r--Documentation/rpmsg.txt14
-rw-r--r--Documentation/scsi/g_NCR5380.txt17
-rw-r--r--Documentation/scsi/scsi-parameters.txt11
-rw-r--r--Documentation/security/LoadPin.txt17
-rw-r--r--Documentation/security/keys.txt52
-rw-r--r--Documentation/sound/alsa/HD-Audio.txt26
-rw-r--r--Documentation/sound/alsa/compress_offload.txt4
-rw-r--r--Documentation/sound/alsa/soc/dapm.txt2
-rw-r--r--Documentation/sound/alsa/soc/overview.txt2
-rw-r--r--Documentation/sound/alsa/timestamping.txt2
-rw-r--r--Documentation/sysctl/kernel.txt16
-rw-r--r--Documentation/sysctl/net.txt11
-rw-r--r--Documentation/sysctl/vm.txt19
-rw-r--r--Documentation/trace/events.txt1555
-rw-r--r--Documentation/trace/ftrace.txt44
-rw-r--r--Documentation/video4linux/CARDLIST.cx238852
-rw-r--r--Documentation/video4linux/CARDLIST.em28xx12
-rw-r--r--Documentation/video4linux/vivid.txt6
-rw-r--r--Documentation/virtual/kvm/api.txt18
-rw-r--r--Documentation/virtual/kvm/devices/s390_flic.txt14
-rw-r--r--Documentation/x86/pat.txt32
-rw-r--r--Documentation/x86/protection-keys.txt27
-rw-r--r--Documentation/x86/topology.txt208
-rw-r--r--Documentation/x86/x86_64/mm.txt6
248 files changed, 17009 insertions, 4249 deletions
diff --git a/Documentation/ABI/testing/sysfs-ibft b/Documentation/ABI/testing/sysfs-ibft
index cac3930bdb04..7d6725fe6143 100644
--- a/Documentation/ABI/testing/sysfs-ibft
+++ b/Documentation/ABI/testing/sysfs-ibft
@@ -21,3 +21,13 @@ Contact: Konrad Rzeszutek <ketuzsezr@darnok.org>
Description: The /sys/firmware/ibft/ethernetX directory will contain
files that expose the iSCSI Boot Firmware Table NIC data.
Usually this contains the IP address, MAC, and gateway of the NIC.
+
+What: /sys/firmware/ibft/acpi_header
+Date: March 2016
+Contact: David Bond <dbond@suse.com>
+Description: The /sys/firmware/ibft/acpi_header directory will contain files
+ that expose the SIGNATURE, OEM_ID, and OEM_TABLE_ID fields of the
+ acpi table header of the iBFT structure. This will allow for
+ identification of the creator of the table which is useful in
+ determining quirks associated with some adapters when used in
+ hardware vs software iscsi initiator mode.
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma b/Documentation/ABI/testing/sysfs-platform-hidma
new file mode 100644
index 000000000000..d36441538660
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-hidma
@@ -0,0 +1,9 @@
+What: /sys/devices/platform/hidma-*/chid
+ /sys/devices/platform/QCOM8061:*/chid
+Date: Dec 2015
+KernelVersion: 4.4
+Contact: "Sinan Kaya <okaya@cudeaurora.org>"
+Description:
+ Contains the ID of the channel within the HIDMA instance.
+ It is used to associate a given HIDMA channel with the
+ priority and weight calls in the management interface.
diff --git a/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl b/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
index 7ac7d7262bb7..3c3514815cd5 100644
--- a/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
+++ b/Documentation/ABI/testing/sysfs-platform-i2c-demux-pinctrl
@@ -1,23 +1,18 @@
-What: /sys/devices/platform/<i2c-demux-name>/cur_master
+What: /sys/devices/platform/<i2c-demux-name>/available_masters
Date: January 2016
KernelVersion: 4.6
Contact: Wolfram Sang <wsa@the-dreams.de>
Description:
+ Reading the file will give you a list of masters which can be
+ selected for a demultiplexed bus. The format is
+ "<index>:<name>". Example from a Renesas Lager board:
-This file selects the active I2C master for a demultiplexed bus.
+ 0:/i2c@e6500000 1:/i2c@e6508000
-Write 0 there for the first master, 1 for the second etc. Reading the file will
-give you a list with the active master marked. Example from a Renesas Lager
-board:
-
-root@Lager:~# cat /sys/devices/platform/i2c@8/cur_master
-* 0 - /i2c@9
- 1 - /i2c@e6520000
- 2 - /i2c@e6530000
-
-root@Lager:~# echo 2 > /sys/devices/platform/i2c@8/cur_master
-
-root@Lager:~# cat /sys/devices/platform/i2c@8/cur_master
- 0 - /i2c@9
- 1 - /i2c@e6520000
-* 2 - /i2c@e6530000
+What: /sys/devices/platform/<i2c-demux-name>/current_master
+Date: January 2016
+KernelVersion: 4.6
+Contact: Wolfram Sang <wsa@the-dreams.de>
+Description:
+ This file selects/shows the active I2C master for a demultiplexed
+ bus. It uses the <index> value from the file 'available_masters'.
diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
index f9b9ad7894f5..5f7c55999c77 100644
--- a/Documentation/DocBook/80211.tmpl
+++ b/Documentation/DocBook/80211.tmpl
@@ -75,7 +75,6 @@
<chapter>
<title>Device registration</title>
!Pinclude/net/cfg80211.h Device registration
-!Finclude/net/cfg80211.h ieee80211_band
!Finclude/net/cfg80211.h ieee80211_channel_flags
!Finclude/net/cfg80211.h ieee80211_channel
!Finclude/net/cfg80211.h ieee80211_rate_flags
@@ -136,6 +135,7 @@
!Finclude/net/cfg80211.h cfg80211_tx_mlme_mgmt
!Finclude/net/cfg80211.h cfg80211_ibss_joined
!Finclude/net/cfg80211.h cfg80211_connect_result
+!Finclude/net/cfg80211.h cfg80211_connect_bss
!Finclude/net/cfg80211.h cfg80211_roamed
!Finclude/net/cfg80211.h cfg80211_disconnected
!Finclude/net/cfg80211.h cfg80211_ready_on_channel
diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl
index 348619fcafb8..d55dc5a39bad 100644
--- a/Documentation/DocBook/crypto-API.tmpl
+++ b/Documentation/DocBook/crypto-API.tmpl
@@ -1936,9 +1936,9 @@ static int test_skcipher(void)
}
req = skcipher_request_alloc(skcipher, GFP_KERNEL);
- if (IS_ERR(req)) {
- pr_info("could not allocate request queue\n");
- ret = PTR_ERR(req);
+ if (!req) {
+ pr_info("could not allocate skcipher request\n");
+ ret = -ENOMEM;
goto out;
}
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index 184f3c7b5145..893b2cabf7e4 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -233,6 +233,7 @@ X!Isound/sound_firmware.c
!Iinclude/media/v4l2-mediabus.h
!Iinclude/media/v4l2-mem2mem.h
!Iinclude/media/v4l2-of.h
+!Iinclude/media/v4l2-rect.h
!Iinclude/media/v4l2-subdev.h
!Iinclude/media/videobuf2-core.h
!Iinclude/media/videobuf2-v4l2.h
diff --git a/Documentation/DocBook/media/dvb/net.xml b/Documentation/DocBook/media/dvb/net.xml
index d2e44b7e07df..da095ed0b75c 100644
--- a/Documentation/DocBook/media/dvb/net.xml
+++ b/Documentation/DocBook/media/dvb/net.xml
@@ -15,7 +15,7 @@
that are present on the transport stream. This is done through
<constant>/dev/dvb/adapter?/net?</constant> device node.
The data will be available via virtual <constant>dvb?_?</constant>
- network interfaces, and will be controled/routed via the standard
+ network interfaces, and will be controlled/routed via the standard
ip tools (like ip, route, netstat, ifconfig, etc).</para>
<para> Data types and and ioctl definitions are defined via
<constant>linux/dvb/net.h</constant> header.</para>
diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml
index 5399e8904715..82fa328abd58 100644
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -2686,50 +2686,12 @@ and may change in the future.</para>
<itemizedlist>
<listitem>
- <para>Video Output Overlay (OSD) Interface, <xref
- linkend="osd" />.</para>
- </listitem>
- <listitem>
<para>&VIDIOC-DBG-G-REGISTER; and &VIDIOC-DBG-S-REGISTER;
ioctls.</para>
</listitem>
<listitem>
<para>&VIDIOC-DBG-G-CHIP-INFO; ioctl.</para>
</listitem>
- <listitem>
- <para>&VIDIOC-ENUM-DV-TIMINGS;, &VIDIOC-QUERY-DV-TIMINGS; and
- &VIDIOC-DV-TIMINGS-CAP; ioctls.</para>
- </listitem>
- <listitem>
- <para>Flash API. <xref linkend="flash-controls" /></para>
- </listitem>
- <listitem>
- <para>&VIDIOC-CREATE-BUFS; and &VIDIOC-PREPARE-BUF; ioctls.</para>
- </listitem>
- <listitem>
- <para>Selection API. <xref linkend="selection-api" /></para>
- </listitem>
- <listitem>
- <para>Sub-device selection API: &VIDIOC-SUBDEV-G-SELECTION;
- and &VIDIOC-SUBDEV-S-SELECTION; ioctls.</para>
- </listitem>
- <listitem>
- <para>Support for frequency band enumeration: &VIDIOC-ENUM-FREQ-BANDS; ioctl.</para>
- </listitem>
- <listitem>
- <para>Vendor and device specific media bus pixel formats.
- <xref linkend="v4l2-mbus-vendor-spec-fmts" />.</para>
- </listitem>
- <listitem>
- <para>Importing DMABUF file descriptors as a new IO method described
- in <xref linkend="dmabuf" />.</para>
- </listitem>
- <listitem>
- <para>Exporting DMABUF files using &VIDIOC-EXPBUF; ioctl.</para>
- </listitem>
- <listitem>
- <para>Software Defined Radio (SDR) Interface, <xref linkend="sdr" />.</para>
- </listitem>
</itemizedlist>
</section>
diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index f5f5ce8badac..e2e5484d2d9b 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -4272,13 +4272,6 @@ manually or automatically if set to zero. Unit, range and step are driver-specif
<section id="flash-controls">
<title>Flash Control Reference</title>
- <note>
- <title>Experimental</title>
-
- <para>This is an <link linkend="experimental">experimental</link>
-interface and may change in the future.</para>
- </note>
-
<para>
The V4L2 flash controls are intended to provide generic access
to flash controller devices. Flash controller devices are
@@ -4743,14 +4736,6 @@ interface and may change in the future.</para>
<section id="image-source-controls">
<title>Image Source Control Reference</title>
- <note>
- <title>Experimental</title>
-
- <para>This is an <link
- linkend="experimental">experimental</link> interface and may
- change in the future.</para>
- </note>
-
<para>
The Image Source control class is intended for low-level
control of image source devices such as image sensors. The
@@ -4862,14 +4847,6 @@ interface and may change in the future.</para>
<section id="image-process-controls">
<title>Image Process Control Reference</title>
- <note>
- <title>Experimental</title>
-
- <para>This is an <link
- linkend="experimental">experimental</link> interface and may
- change in the future.</para>
- </note>
-
<para>
The Image Process control class is intended for low-level control of
image processing functions. Unlike
@@ -4955,14 +4932,6 @@ interface and may change in the future.</para>
<section id="dv-controls">
<title>Digital Video Control Reference</title>
- <note>
- <title>Experimental</title>
-
- <para>This is an <link
- linkend="experimental">experimental</link> interface and may
- change in the future.</para>
- </note>
-
<para>
The Digital Video control class is intended to control receivers
and transmitters for <ulink url="http://en.wikipedia.org/wiki/Vga">VGA</ulink>,
diff --git a/Documentation/DocBook/media/v4l/dev-sdr.xml b/Documentation/DocBook/media/v4l/dev-sdr.xml
index a659771f7b7c..6da1157fb5bd 100644
--- a/Documentation/DocBook/media/v4l/dev-sdr.xml
+++ b/Documentation/DocBook/media/v4l/dev-sdr.xml
@@ -1,11 +1,5 @@
<title>Software Defined Radio Interface (SDR)</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
<para>
SDR is an abbreviation of Software Defined Radio, the radio device
which uses application software for modulation or demodulation. This interface
diff --git a/Documentation/DocBook/media/v4l/dev-subdev.xml b/Documentation/DocBook/media/v4l/dev-subdev.xml
index 4f0ba58c9bd9..f4bc27af83eb 100644
--- a/Documentation/DocBook/media/v4l/dev-subdev.xml
+++ b/Documentation/DocBook/media/v4l/dev-subdev.xml
@@ -1,11 +1,5 @@
<title>Sub-device Interface</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
- interface and may change in the future.</para>
- </note>
-
<para>The complex nature of V4L2 devices, where hardware is often made of
several integrated circuits that need to interact with each other in a
controlled way, leads to complex V4L2 drivers. The drivers usually reflect
diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
index 144158b3a5ac..e09025db92bd 100644
--- a/Documentation/DocBook/media/v4l/io.xml
+++ b/Documentation/DocBook/media/v4l/io.xml
@@ -475,12 +475,6 @@ rest should be evident.</para>
<section id="dmabuf">
<title>Streaming I/O (DMA buffer importing)</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
- interface and may change in the future.</para>
- </note>
-
<para>The DMABUF framework provides a generic method for sharing buffers
between multiple devices. Device drivers that support DMABUF can export a DMA
buffer to userspace as a file descriptor (known as the exporter role), import a
diff --git a/Documentation/DocBook/media/v4l/selection-api.xml b/Documentation/DocBook/media/v4l/selection-api.xml
index 28cbded766c9..b764cba150d1 100644
--- a/Documentation/DocBook/media/v4l/selection-api.xml
+++ b/Documentation/DocBook/media/v4l/selection-api.xml
@@ -1,13 +1,6 @@
<section id="selection-api">
- <title>Experimental API for cropping, composing and scaling</title>
-
- <note>
- <title>Experimental</title>
-
- <para>This is an <link linkend="experimental">experimental</link>
-interface and may change in the future.</para>
- </note>
+ <title>API for cropping, composing and scaling</title>
<section>
<title>Introduction</title>
diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 4e73345e3eab..199c84e3aede 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -4002,12 +4002,6 @@ see <xref linkend="colorspaces" />.</entry>
<section id="v4l2-mbus-vendor-spec-fmts">
<title>Vendor and Device Specific Formats</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
-interface and may change in the future.</para>
- </note>
-
<para>This section lists complex data formats that are either vendor or
device specific.
</para>
diff --git a/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml b/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml
index d81fa0d4016b..6528e97b8990 100644
--- a/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml
@@ -49,12 +49,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
<para>This ioctl is used to create buffers for <link linkend="mmap">memory
mapped</link> or <link linkend="userp">user pointer</link> or <link
linkend="dmabuf">DMA buffer</link> I/O. It can be used as an alternative or in
diff --git a/Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml b/Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml
index a2017bfcaed2..ca9ffce9b4c1 100644
--- a/Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml
@@ -49,14 +49,9 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
- <para>To query the capabilities of the DV receiver/transmitter applications
-can call the <constant>VIDIOC_DV_TIMINGS_CAP</constant> ioctl on a video node
+ <para>To query the capabilities of the DV receiver/transmitter applications initialize the
+<structfield>pad</structfield> field to 0, zero the reserved array of &v4l2-dv-timings-cap;
+and call the <constant>VIDIOC_DV_TIMINGS_CAP</constant> ioctl on a video node
and the driver will fill in the structure. Note that drivers may return
different values after switching the video input or output.</para>
@@ -65,8 +60,8 @@ queried by calling the <constant>VIDIOC_SUBDEV_DV_TIMINGS_CAP</constant> ioctl
directly on a subdevice node. The capabilities are specific to inputs (for DV
receivers) or outputs (for DV transmitters), applications must specify the
desired pad number in the &v4l2-dv-timings-cap; <structfield>pad</structfield>
-field. Attempts to query capabilities on a pad that doesn't support them will
-return an &EINVAL;.</para>
+field and zero the <structfield>reserved</structfield> array. Attempts to query
+capabilities on a pad that doesn't support them will return an &EINVAL;.</para>
<table pgwide="1" frame="none" id="v4l2-bt-timings-cap">
<title>struct <structname>v4l2_bt_timings_cap</structname></title>
@@ -145,7 +140,8 @@ return an &EINVAL;.</para>
<row>
<entry>__u32</entry>
<entry><structfield>reserved</structfield>[2]</entry>
- <entry>Reserved for future extensions. Drivers must set the array to zero.</entry>
+ <entry>Reserved for future extensions. Drivers and applications must
+ set the array to zero.</entry>
</row>
<row>
<entry>union</entry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml b/Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml
index 6e3cadd4e1f9..9b3d42018b69 100644
--- a/Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml
@@ -49,20 +49,15 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
<para>While some DV receivers or transmitters support a wide range of timings, others
support only a limited number of timings. With this ioctl applications can enumerate a list
of known supported timings. Call &VIDIOC-DV-TIMINGS-CAP; to check if it also supports other
standards or even custom timings that are not in this list.</para>
<para>To query the available timings, applications initialize the
-<structfield>index</structfield> field and zero the reserved array of &v4l2-enum-dv-timings;
-and call the <constant>VIDIOC_ENUM_DV_TIMINGS</constant> ioctl on a video node with a
+<structfield>index</structfield> field, set the <structfield>pad</structfield> field to 0,
+zero the reserved array of &v4l2-enum-dv-timings; and call the
+<constant>VIDIOC_ENUM_DV_TIMINGS</constant> ioctl on a video node with a
pointer to this structure. Drivers fill the rest of the structure or return an
&EINVAL; when the index is out of bounds. To enumerate all supported DV timings,
applications shall begin at index zero, incrementing by one until the
diff --git a/Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml b/Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml
index 4e8ea65f7282..a0608abc1ab8 100644
--- a/Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml
@@ -49,12 +49,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
<para>Enumerates the frequency bands that a tuner or modulator supports.
To do this applications initialize the <structfield>tuner</structfield>,
<structfield>type</structfield> and <structfield>index</structfield> fields,
diff --git a/Documentation/DocBook/media/v4l/vidioc-expbuf.xml b/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
index 0ae0b6a915d0..a6558a676ef3 100644
--- a/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
@@ -49,12 +49,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
<para>This ioctl is an extension to the <link linkend="mmap">memory
mapping</link> I/O method, therefore it is available only for
<constant>V4L2_MEMORY_MMAP</constant> buffers. It can be used to export a
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-edid.xml b/Documentation/DocBook/media/v4l/vidioc-g-edid.xml
index 2702536bbc7c..b7602d30f596 100644
--- a/Documentation/DocBook/media/v4l/vidioc-g-edid.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-g-edid.xml
@@ -1,6 +1,6 @@
<refentry id="vidioc-g-edid">
<refmeta>
- <refentrytitle>ioctl VIDIOC_G_EDID, VIDIOC_S_EDID</refentrytitle>
+ <refentrytitle>ioctl VIDIOC_G_EDID, VIDIOC_S_EDID, VIDIOC_SUBDEV_G_EDID, VIDIOC_SUBDEV_S_EDID</refentrytitle>
&manvol;
</refmeta>
@@ -71,7 +71,8 @@
<para>To get the EDID data the application has to fill in the <structfield>pad</structfield>,
<structfield>start_block</structfield>, <structfield>blocks</structfield> and <structfield>edid</structfield>
- fields and call <constant>VIDIOC_G_EDID</constant>. The current EDID from block
+ fields, zero the <structfield>reserved</structfield> array and call
+ <constant>VIDIOC_G_EDID</constant>. The current EDID from block
<structfield>start_block</structfield> and of size <structfield>blocks</structfield>
will be placed in the memory <structfield>edid</structfield> points to. The <structfield>edid</structfield>
pointer must point to memory at least <structfield>blocks</structfield>&nbsp;*&nbsp;128 bytes
@@ -92,8 +93,9 @@
the driver will set <structfield>blocks</structfield> to 0 and it returns 0.</para>
<para>To set the EDID blocks of a receiver the application has to fill in the <structfield>pad</structfield>,
- <structfield>blocks</structfield> and <structfield>edid</structfield> fields and set
- <structfield>start_block</structfield> to 0. It is not possible to set part of an EDID,
+ <structfield>blocks</structfield> and <structfield>edid</structfield> fields, set
+ <structfield>start_block</structfield> to 0 and zero the <structfield>reserved</structfield> array.
+ It is not possible to set part of an EDID,
it is always all or nothing. Setting the EDID data is only valid for receivers as it makes
no sense for a transmitter.</para>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-selection.xml b/Documentation/DocBook/media/v4l/vidioc-g-selection.xml
index a9c0d1dc209a..997f4e96f297 100644
--- a/Documentation/DocBook/media/v4l/vidioc-g-selection.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-g-selection.xml
@@ -50,12 +50,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
<para>The ioctls are used to query and configure selection rectangles.</para>
<para>To query the cropping (composing) rectangle set &v4l2-selection;
diff --git a/Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml b/Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml
index fa7ad7e33228..7bde698760e4 100644
--- a/Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml
@@ -48,12 +48,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
<para>Applications can optionally call the
<constant>VIDIOC_PREPARE_BUF</constant> ioctl to pass ownership of the buffer
to the driver before actually enqueuing it, using the
diff --git a/Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml b/Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml
index 0c93677d16b4..d41bf47ee5a2 100644
--- a/Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml
@@ -50,12 +50,6 @@ input</refpurpose>
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental"> experimental </link>
- interface and may change in the future.</para>
- </note>
-
<para>The hardware may be able to detect the current DV timings
automatically, similar to sensing the video standard. To do so, applications
call <constant>VIDIOC_QUERY_DV_TIMINGS</constant> with a pointer to a
diff --git a/Documentation/DocBook/media/v4l/vidioc-streamon.xml b/Documentation/DocBook/media/v4l/vidioc-streamon.xml
index df2c63d07bac..89fd7ce964f9 100644
--- a/Documentation/DocBook/media/v4l/vidioc-streamon.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-streamon.xml
@@ -123,6 +123,14 @@ synchronize with other events.</para>
</para>
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><errorcode>ENOLINK</errorcode></term>
+ <listitem>
+ <para>The driver implements Media Controller interface and
+ the pipeline link configuration is invalid.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</refsect1>
</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml
index cff59f5cbf04..9d0251a27e5f 100644
--- a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml
@@ -49,12 +49,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
- interface and may change in the future.</para>
- </note>
-
<para>This ioctl lets applications enumerate available frame intervals on a
given sub-device pad. Frame intervals only makes sense for sub-devices that
can control the frame period on their own. This includes, for instance,
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml
index abd545ede67a..9b91b8332ba9 100644
--- a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml
@@ -49,12 +49,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
- interface and may change in the future.</para>
- </note>
-
<para>This ioctl allows applications to enumerate all frame sizes
supported by a sub-device on the given pad for the given media bus format.
Supported formats can be retrieved with the &VIDIOC-SUBDEV-ENUM-MBUS-CODE;
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml
index 0bcb278fd062..c67256ada87a 100644
--- a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml
@@ -49,12 +49,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
- interface and may change in the future.</para>
- </note>
-
<para>To enumerate media bus formats available at a given sub-device pad
applications initialize the <structfield>pad</structfield>, <structfield>which</structfield>
and <structfield>index</structfield> fields of &v4l2-subdev-mbus-code-enum; and
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml
index a67cde6f8c54..781089cba453 100644
--- a/Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml
@@ -50,12 +50,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
- interface and may change in the future.</para>
- </note>
-
<para>These ioctls are used to negotiate the frame format at specific
subdev pads in the image pipeline.</para>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml
index 0bc3ea22d31f..848ec789ddaa 100644
--- a/Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml
@@ -50,12 +50,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
- interface and may change in the future.</para>
- </note>
-
<para>These ioctls are used to get and set the frame interval at specific
subdev pads in the image pipeline. The frame interval only makes sense for
sub-devices that can control the frame period on their own. This includes,
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml
index c62a7360719b..8346b2e4a703 100644
--- a/Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml
@@ -49,12 +49,6 @@
<refsect1>
<title>Description</title>
- <note>
- <title>Experimental</title>
- <para>This is an <link linkend="experimental">experimental</link>
- interface and may change in the future.</para>
- </note>
-
<para>The selections are used to configure various image
processing functionality performed by the subdevs which affect the
image size. This currently includes cropping, scaling and
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg
new file mode 100644
index 000000000000..727e270b11e4
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCU.svg
@@ -0,0 +1,474 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:28:20 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="9.1in"
+ height="8.9in"
+ viewBox="-66 -66 10932 10707"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreeClassicRCU.fig">
+ <metadata
+ id="metadata106">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs104">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3864"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="973"
+ inkscape:window-height="1137"
+ id="namedview102"
+ showgrid="false"
+ inkscape:zoom="0.9743589"
+ inkscape:cx="409.50003"
+ inkscape:cy="400.49997"
+ inkscape:window-x="915"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="10800"
+ height="5625"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="1125"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="3825"
+ y="900"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="6525"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Line -->
+ <polyline
+ points="3375,6525 3375,5046 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline14" />
+ <!-- Arrowhead on XXXpoint 3375 6525 - 3375 4860-->
+ <!-- Circle -->
+ <circle
+ cx="7425"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle18" />
+ <!-- Circle -->
+ <circle
+ cx="7875"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle20" />
+ <!-- Circle -->
+ <circle
+ cx="8325"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle22" />
+ <!-- Circle -->
+ <circle
+ cx="2025"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle24" />
+ <!-- Circle -->
+ <circle
+ cx="2475"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle26" />
+ <!-- Circle -->
+ <circle
+ cx="2925"
+ cy="6075"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle28" />
+ <!-- Circle -->
+ <circle
+ cx="4725"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle30" />
+ <!-- Circle -->
+ <circle
+ cx="5175"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle32" />
+ <!-- Circle -->
+ <circle
+ cx="5625"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle34" />
+ <!-- Line: box -->
+ <rect
+ x="2025"
+ y="6525"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect36" />
+ <!-- Line -->
+ <polyline
+ points="2475,3600 3975,2310 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 2475 3600 - 4116 2190-->
+ <!-- Line -->
+ <polyline
+ points="7875,3600 6372,2310 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline42" />
+ <!-- Arrowhead on XXXpoint 7875 3600 - 6231 2190-->
+ <!-- Line -->
+ <polyline
+ points="6975,8775 6975,5046 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46" />
+ <!-- Arrowhead on XXXpoint 6975 8775 - 6975 4860-->
+ <!-- Line -->
+ <polyline
+ points="1575,8775 1575,5046 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50" />
+ <!-- Arrowhead on XXXpoint 1575 8775 - 1575 4860-->
+ <!-- Line -->
+ <polyline
+ points="8775,6525 8775,5046 "
+ style="stroke:#00d1d1;stroke-width:44.9934641;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54" />
+ <!-- Arrowhead on XXXpoint 8775 6525 - 8775 4860-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1575"
+ y="9225"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text58">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1575"
+ y="9675"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text60">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1575"
+ y="10350"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text62">CPU 0</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3375"
+ y="6975"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text64">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3375"
+ y="7425"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text66">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3375"
+ y="8100"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text68">CPU 15</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6975"
+ y="9225"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text70">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6975"
+ y="9675"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text72">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6975"
+ y="10350"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text74">CPU 1007</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="6930"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text76">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="7380"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text78">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="8055"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text80">CPU 1023</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="start"
+ id="text82">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2475"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text84">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2475"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text86">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7875"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text88">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7875"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text90">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5175"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text92">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5175"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text94">rcu_node</text>
+ <!-- Line: box -->
+ <rect
+ x="225"
+ y="8775"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect96" />
+ <!-- Line: box -->
+ <rect
+ x="5625"
+ y="8775"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect98" />
+ <!-- Line: box -->
+ <rect
+ x="7380"
+ y="6480"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect100" />
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
new file mode 100644
index 000000000000..9bbb1944f962
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg
@@ -0,0 +1,499 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:26:09 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="5.7in"
+ height="6.6in"
+ viewBox="-44 -44 6838 7888"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreeClassicRCUBH.fig">
+ <metadata
+ id="metadata110">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs108">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3868"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Mend"
+ style="overflow:visible;">
+ <path
+ id="path3886"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(0.6) rotate(180) translate(0,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="878"
+ inkscape:window-height="1148"
+ id="namedview106"
+ showgrid="false"
+ inkscape:zoom="1.3547758"
+ inkscape:cx="256.5"
+ inkscape:cy="297"
+ inkscape:window-x="45"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect14" />
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle16" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle18" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle20" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle22" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle24" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle26" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle28" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle30" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle32" />
+ <!-- Line -->
+ <polyline
+ points="1350,3450 2350,2590 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline34" />
+ <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+ <!-- Line -->
+ <polyline
+ points="4950,3450 3948,2590 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect42" />
+ <!-- Line -->
+ <polyline
+ points="2250,5400 2250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline44" />
+ <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect48" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect50" />
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect52" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect54" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect56" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="1650"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect58" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text60">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="1950"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text62">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text64">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text66">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text68">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text70">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text72">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text74">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text76">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text78">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text80">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text82">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text84">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text86">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text88">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text90">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text92">rcu_sched</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5400 5250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline94" />
+ <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+ <!-- Line -->
+ <polyline
+ points="4050,6600 4050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline98" />
+ <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+ <!-- Line -->
+ <polyline
+ points="1050,6600 1050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline102" />
+ <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
new file mode 100644
index 000000000000..21ba7823479d
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg
@@ -0,0 +1,695 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:20:02 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="5.7in"
+ height="8.6in"
+ viewBox="-44 -44 6838 10288"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreeClassicRCUBHdyntick.fig">
+ <metadata
+ id="metadata166">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs164">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3924"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Lend"
+ style="overflow:visible;">
+ <path
+ id="path3936"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(1.1) rotate(180) translate(1,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="845"
+ inkscape:window-height="988"
+ id="namedview162"
+ showgrid="false"
+ inkscape:zoom="1.0452196"
+ inkscape:cx="256.5"
+ inkscape:cy="387.00003"
+ inkscape:window-x="356"
+ inkscape:window-y="61"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect10" />
+ <!-- Line -->
+ <polyline
+ points="5250,8100 5688,5912 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline12" />
+ <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
+ <polyline
+ points="5714 6068 5704 5822 5598 6044 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline14" />
+ <!-- Line -->
+ <polyline
+ points="4050,9300 4486,7262 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline16" />
+ <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
+ <polyline
+ points="4514 7418 4506 7172 4396 7394 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline18" />
+ <!-- Line -->
+ <polyline
+ points="1040,9300 1476,7262 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline20" />
+ <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
+ <polyline
+ points="1504 7418 1496 7172 1386 7394 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline22" />
+ <!-- Line -->
+ <polyline
+ points="2240,8100 2676,6062 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline24" />
+ <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
+ <polyline
+ points="2704 6218 2696 5972 2586 6194 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline26" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect28" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect30" />
+ <!-- Line -->
+ <polyline
+ points="1350,3450 2350,2590 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+ <!-- Line -->
+ <polyline
+ points="4950,3450 3948,2590 "
+ style="stroke:#00d1d1;stroke-width:30.0045575;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+ <!-- Line -->
+ <polyline
+ points="4050,6600 4050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline40" />
+ <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+ <!-- Line -->
+ <polyline
+ points="1050,6600 1050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline44" />
+ <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+ <!-- Line -->
+ <polyline
+ points="2250,5400 2250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline48" />
+ <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+ <!-- Line -->
+ <polyline
+ points="2250,8100 2250,6364 "
+ style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline52" />
+ <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
+ <!-- Line -->
+ <polyline
+ points="1050,9300 1050,7564 "
+ style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline56" />
+ <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
+ <!-- Line -->
+ <polyline
+ points="4050,9300 4050,7564 "
+ style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline60" />
+ <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
+ <!-- Line -->
+ <polyline
+ points="5250,8100 5250,6364 "
+ style="stroke:#00ff00;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline64" />
+ <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle68" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle70" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle72" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle74" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle76" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle78" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle80" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle82" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle84" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect86" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect88" />
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect90" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect92" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect94" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="1650"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect96" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="9300"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect98" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="8100"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect100" />
+ <!-- Line: box -->
+ <rect
+ x="3000"
+ y="9300"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect102" />
+ <!-- Line: box -->
+ <rect
+ x="4350"
+ y="8100"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect104" />
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect106" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text108">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="1950"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text110">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text112">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text114">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text116">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text118">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text120">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text122">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text124">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text126">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text128">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text130">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text132">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text134">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text136">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text138">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="9600"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text140">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="9900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text142">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="9600"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="9900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text146">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text148">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text150">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text152">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text154">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text156">rcu_sched</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5400 5250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00455750000000066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline158" />
+ <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
new file mode 100644
index 000000000000..15adcac036c7
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg
@@ -0,0 +1,741 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:32:59 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="6.1in"
+ height="8.9in"
+ viewBox="-44 -44 7288 10738"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreePreemptRCUBHdyntick.fig">
+ <metadata
+ id="metadata182">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs180">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3940"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="874"
+ inkscape:window-height="1148"
+ id="namedview178"
+ showgrid="false"
+ inkscape:zoom="1.2097379"
+ inkscape:cx="274.5"
+ inkscape:cy="400.49997"
+ inkscape:window-x="946"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="900"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="1200"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="5400"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect16" />
+ <!-- Line -->
+ <polyline
+ points="5250,8550 5688,6362 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline18" />
+ <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
+ <polyline
+ points="5714 6518 5704 6272 5598 6494 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline20" />
+ <!-- Line -->
+ <polyline
+ points="4050,9750 4486,7712 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline22" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
+ <polyline
+ points="4514 7868 4506 7622 4396 7844 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline24" />
+ <!-- Line -->
+ <polyline
+ points="1040,9750 1476,7712 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline26" />
+ <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
+ <polyline
+ points="1504 7868 1496 7622 1386 7844 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline28" />
+ <!-- Line -->
+ <polyline
+ points="2240,8550 2676,6512 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline30" />
+ <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
+ <polyline
+ points="2704 6668 2696 6422 2586 6644 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline32" />
+ <!-- Line -->
+ <polyline
+ points="4050,9750 5682,6360 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline34" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 5736 6246-->
+ <polyline
+ points="5672 6518 5722 6276 5562 6466 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline36" />
+ <!-- Line -->
+ <polyline
+ points="1010,9750 2642,6360 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 1010 9750 - 2696 6246-->
+ <polyline
+ points="2632 6518 2682 6276 2522 6466 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline40" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="900"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect42" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1500"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect44" />
+ <!-- Line -->
+ <polyline
+ points="1350,3900 2350,3040 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46" />
+ <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
+ <!-- Line -->
+ <polyline
+ points="4950,3900 3948,3040 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50" />
+ <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
+ <!-- Line -->
+ <polyline
+ points="4050,7050 4050,4864 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54" />
+ <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
+ <!-- Line -->
+ <polyline
+ points="1050,7050 1050,4864 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline58" />
+ <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
+ <!-- Line -->
+ <polyline
+ points="2250,5850 2250,4864 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline62" />
+ <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
+ <!-- Line -->
+ <polyline
+ points="2250,8550 2250,6814 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline66" />
+ <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
+ <!-- Line -->
+ <polyline
+ points="1050,9750 1050,8014 "
+ style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline70" />
+ <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
+ <!-- Line -->
+ <polyline
+ points="4050,9750 4050,8014 "
+ style="stroke:#00ff00;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline74" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
+ <!-- Line -->
+ <polyline
+ points="5250,8550 5250,6814 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline78" />
+ <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle82" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle84" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle86" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle88" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle90" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle92" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle94" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle96" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle98" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3900"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect100" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="7050"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect102" />
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3900"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect104" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5850"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect106" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="7050"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect108" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="2100"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect110" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="9750"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect112" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="8550"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect114" />
+ <!-- Line: box -->
+ <rect
+ x="3000"
+ y="9750"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect116" />
+ <!-- Line: box -->
+ <rect
+ x="4350"
+ y="8550"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect118" />
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5850"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect120" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text122">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text124">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text126">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text128">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text130">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text132">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text134">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text136">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text138">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text140">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7650"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text142">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text146">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text148">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7650"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text150">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text152">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text154">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="10350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text156">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text158">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="10350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text160">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8850"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text162">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="9150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text164">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8850"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text166">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="9150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text168">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6900"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text170">rcu_preempt</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="1200"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text172">rcu_sched</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5850 5250,4864 "
+ style="stroke:#00d1d1;stroke-width:30.00205472;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline174" />
+ <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
new file mode 100644
index 000000000000..bbc3801470d0
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg
@@ -0,0 +1,858 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:29:48 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="7.4in"
+ height="9.9in"
+ viewBox="-44 -44 8938 11938"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="BigTreePreemptRCUBHdyntickCB.svg">
+ <metadata
+ id="metadata212">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs210">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3970"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="881"
+ inkscape:window-height="1128"
+ id="namedview208"
+ showgrid="false"
+ inkscape:zoom="1.0195195"
+ inkscape:cx="333"
+ inkscape:cy="445.49997"
+ inkscape:window-x="936"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="900"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="1200"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="5400"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect16" />
+ <!-- Line -->
+ <polyline
+ points="5250,8550 5688,6362 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline18" />
+ <!-- Arrowhead on XXXpoint 5250 8550 - 5710 6240-->
+ <polyline
+ points="5714 6518 5704 6272 5598 6494 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline20" />
+ <!-- Line -->
+ <polyline
+ points="4050,9750 4486,7712 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline22" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 4512 7590-->
+ <polyline
+ points="4514 7868 4506 7622 4396 7844 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline24" />
+ <!-- Line -->
+ <polyline
+ points="1040,9750 1476,7712 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline26" />
+ <!-- Arrowhead on XXXpoint 1040 9750 - 1502 7590-->
+ <polyline
+ points="1504 7868 1496 7622 1386 7844 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline28" />
+ <!-- Line -->
+ <polyline
+ points="2240,8550 2676,6512 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline30" />
+ <!-- Arrowhead on XXXpoint 2240 8550 - 2702 6390-->
+ <polyline
+ points="2704 6668 2696 6422 2586 6644 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline32" />
+ <!-- Line -->
+ <polyline
+ points="4050,9600 5692,6062 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline34" />
+ <!-- Arrowhead on XXXpoint 4050 9600 - 5744 5948-->
+ <polyline
+ points="5682 6220 5730 5978 5574 6170 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline36" />
+ <!-- Line -->
+ <polyline
+ points="1086,9600 2728,6062 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 1086 9600 - 2780 5948-->
+ <polyline
+ points="2718 6220 2766 5978 2610 6170 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline40" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="900"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect42" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1500"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect44" />
+ <!-- Line -->
+ <polyline
+ points="1350,3900 2350,3040 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46" />
+ <!-- Arrowhead on XXXpoint 1350 3900 - 2444 2960-->
+ <!-- Line -->
+ <polyline
+ points="4950,3900 3948,3040 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50" />
+ <!-- Arrowhead on XXXpoint 4950 3900 - 3854 2960-->
+ <!-- Line -->
+ <polyline
+ points="4050,7050 4050,4864 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54" />
+ <!-- Arrowhead on XXXpoint 4050 7050 - 4050 4740-->
+ <!-- Line -->
+ <polyline
+ points="1050,7050 1050,4864 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline58" />
+ <!-- Arrowhead on XXXpoint 1050 7050 - 1050 4740-->
+ <!-- Line -->
+ <polyline
+ points="2250,5850 2250,4864 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline62" />
+ <!-- Arrowhead on XXXpoint 2250 5850 - 2250 4740-->
+ <!-- Line -->
+ <polyline
+ points="2250,8550 2250,6814 "
+ style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline66" />
+ <!-- Arrowhead on XXXpoint 2250 8550 - 2250 6690-->
+ <!-- Line -->
+ <polyline
+ points="1050,9750 1050,8014 "
+ style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline70" />
+ <!-- Arrowhead on XXXpoint 1050 9750 - 1050 7890-->
+ <!-- Line -->
+ <polyline
+ points="4050,9750 4050,8014 "
+ style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline74" />
+ <!-- Arrowhead on XXXpoint 4050 9750 - 4050 7890-->
+ <!-- Line -->
+ <polyline
+ points="5250,8550 5250,6814 "
+ style="stroke:#00ff00;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline78" />
+ <!-- Arrowhead on XXXpoint 5250 8550 - 5250 6690-->
+ <!-- Line -->
+ <polyline
+ points="6000,6300 8048,7910 "
+ style="stroke:#87cfff;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline82" />
+ <!-- Arrowhead on XXXpoint 6000 6300 - 8146 7986-->
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle86" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle88" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="4350"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle90" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle92" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle94" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle96" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle98" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle100" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5550"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle102" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="7950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect104" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="9450"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect106" />
+ <!-- Line -->
+ <polyline
+ points="8100,8850 8100,9384 "
+ style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline108" />
+ <!-- Arrowhead on XXXpoint 8100 8850 - 8100 9510-->
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="10950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect112" />
+ <!-- Line -->
+ <polyline
+ points="8100,10350 8100,10884 "
+ style="stroke:#000000;stroke-width:30;stroke-linejoin:miter;stroke-linecap:butt;marker-end:url(#Arrow1Mend)"
+ id="polyline114" />
+ <!-- Arrowhead on XXXpoint 8100 10350 - 8100 11010-->
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3900"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect118" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="7050"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect120" />
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3900"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect122" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5850"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect124" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="7050"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect126" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="2100"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect128" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="9750"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect130" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="8550"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect132" />
+ <!-- Line: box -->
+ <rect
+ x="3000"
+ y="9750"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect134" />
+ <!-- Line: box -->
+ <rect
+ x="4350"
+ y="8550"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect136" />
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5850"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect138" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="8250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text140">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="8550"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text142">rcu_head</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="9750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text146">rcu_head</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="11250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text148">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8100"
+ y="11550"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text150">rcu_head</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="1200"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text152">rcu_sched</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text154">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text156">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text158">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text160">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text162">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text164">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text166">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text168">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text170">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text172">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7650"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text174">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text176">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text178">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text180">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7650"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text182">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text184">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text186">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="10350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text188">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="10050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text190">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="10350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text192">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8850"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text194">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="9150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text196">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8850"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text198">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="9150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text200">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6900"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text202">rcu_preempt</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5850 5250,4864 "
+ style="stroke:#00d1d1;stroke-width:29.99463964;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline204" />
+ <!-- Arrowhead on XXXpoint 5250 5850 - 5250 4740-->
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
new file mode 100644
index 000000000000..7eb47ac25ad7
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html
@@ -0,0 +1,1333 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <html>
+ <head><title>A Tour Through TREE_RCU's Data Structures [LWN.net]</title>
+ <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
+
+ <p>January 27, 2016</p>
+ <p>This article was contributed by Paul E.&nbsp;McKenney</p>
+
+<h3>Introduction</h3>
+
+This document describes RCU's major data structures and their relationship
+to each other.
+
+<ol>
+<li> <a href="#Data-Structure Relationships">
+ Data-Structure Relationships</a>
+<li> <a href="#The rcu_state Structure">
+ The <tt>rcu_state</tt> Structure</a>
+<li> <a href="#The rcu_node Structure">
+ The <tt>rcu_node</tt> Structure</a>
+<li> <a href="#The rcu_data Structure">
+ The <tt>rcu_data</tt> Structure</a>
+<li> <a href="#The rcu_dynticks Structure">
+ The <tt>rcu_dynticks</tt> Structure</a>
+<li> <a href="#The rcu_head Structure">
+ The <tt>rcu_head</tt> Structure</a>
+<li> <a href="#RCU-Specific Fields in the task_struct Structure">
+ RCU-Specific Fields in the <tt>task_struct</tt> Structure</a>
+<li> <a href="#Accessor Functions">
+ Accessor Functions</a>
+</ol>
+
+At the end we have the
+<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+
+<h3><a name="Data-Structure Relationships">Data-Structure Relationships</a></h3>
+
+<p>RCU is for all intents and purposes a large state machine, and its
+data structures maintain the state in such a way as to allow RCU readers
+to execute extremely quickly, while also processing the RCU grace periods
+requested by updaters in an efficient and extremely scalable fashion.
+The efficiency and scalability of RCU updaters is provided primarily
+by a combining tree, as shown below:
+
+</p><p><img src="BigTreeClassicRCU.svg" alt="BigTreeClassicRCU.svg" width="30%">
+
+</p><p>This diagram shows an enclosing <tt>rcu_state</tt> structure
+containing a tree of <tt>rcu_node</tt> structures.
+Each leaf node of the <tt>rcu_node</tt> tree has up to 16
+<tt>rcu_data</tt> structures associated with it, so that there
+are <tt>NR_CPUS</tt> number of <tt>rcu_data</tt> structures,
+one for each possible CPU.
+This structure is adjusted at boot time, if needed, to handle the
+common case where <tt>nr_cpu_ids</tt> is much less than
+<tt>NR_CPUs</tt>.
+For example, a number of Linux distributions set <tt>NR_CPUs=4096</tt>,
+which results in a three-level <tt>rcu_node</tt> tree.
+If the actual hardware has only 16 CPUs, RCU will adjust itself
+at boot time, resulting in an <tt>rcu_node</tt> tree with only a single node.
+
+</p><p>The purpose of this combining tree is to allow per-CPU events
+such as quiescent states, dyntick-idle transitions,
+and CPU hotplug operations to be processed efficiently
+and scalably.
+Quiescent states are recorded by the per-CPU <tt>rcu_data</tt> structures,
+and other events are recorded by the leaf-level <tt>rcu_node</tt>
+structures.
+All of these events are combined at each level of the tree until finally
+grace periods are completed at the tree's root <tt>rcu_node</tt>
+structure.
+A grace period can be completed at the root once every CPU
+(or, in the case of <tt>CONFIG_PREEMPT_RCU</tt>, task)
+has passed through a quiescent state.
+Once a grace period has completed, record of that fact is propagated
+back down the tree.
+
+</p><p>As can be seen from the diagram, on a 64-bit system
+a two-level tree with 64 leaves can accommodate 1,024 CPUs, with a fanout
+of 64 at the root and a fanout of 16 at the leaves.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why isn't the fanout at the leaves also 64?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Because there are more types of events that affect the leaf-level
+ <tt>rcu_node</tt> structures than further up the tree.
+ Therefore, if the leaf <tt>rcu_node</tt> structures have fanout of
+ 64, the contention on these structures' <tt>-&gt;structures</tt>
+ becomes excessive.
+ Experimentation on a wide variety of systems has shown that a fanout
+ of 16 works well for the leaves of the <tt>rcu_node</tt> tree.
+ </font>
+
+ <p><font color="ffffff">Of course, further experience with
+ systems having hundreds or thousands of CPUs may demonstrate
+ that the fanout for the non-leaf <tt>rcu_node</tt> structures
+ must also be reduced.
+ Such reduction can be easily carried out when and if it proves
+ necessary.
+ In the meantime, if you are using such a system and running into
+ contention problems on the non-leaf <tt>rcu_node</tt> structures,
+ you may use the <tt>CONFIG_RCU_FANOUT</tt> kernel configuration
+ parameter to reduce the non-leaf fanout as needed.
+ </font>
+
+ <p><font color="ffffff">Kernels built for systems with
+ strong NUMA characteristics might also need to adjust
+ <tt>CONFIG_RCU_FANOUT</tt> so that the domains of the
+ <tt>rcu_node</tt> structures align with hardware boundaries.
+ However, there has thus far been no need for this.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>If your system has more than 1,024 CPUs (or more than 512 CPUs on
+a 32-bit system), then RCU will automatically add more levels to the
+tree.
+For example, if you are crazy enough to build a 64-bit system with 65,536
+CPUs, RCU would configure the <tt>rcu_node</tt> tree as follows:
+
+</p><p><img src="HugeTreeClassicRCU.svg" alt="HugeTreeClassicRCU.svg" width="50%">
+
+</p><p>RCU currently permits up to a four-level tree, which on a 64-bit system
+accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for
+32-bit systems.
+On the other hand, you can set <tt>CONFIG_RCU_FANOUT</tt> to be
+as small as 2 if you wish, which would permit only 16 CPUs, which
+is useful for testing.
+
+</p><p>This multi-level combining tree allows us to get most of the
+performance and scalability
+benefits of partitioning, even though RCU grace-period detection is
+inherently a global operation.
+The trick here is that only the last CPU to report a quiescent state
+into a given <tt>rcu_node</tt> structure need advance to the <tt>rcu_node</tt>
+structure at the next level up the tree.
+This means that at the leaf-level <tt>rcu_node</tt> structure, only
+one access out of sixteen will progress up the tree.
+For the internal <tt>rcu_node</tt> structures, the situation is even
+more extreme: Only one access out of sixty-four will progress up
+the tree.
+Because the vast majority of the CPUs do not progress up the tree,
+the lock contention remains roughly constant up the tree.
+No matter how many CPUs there are in the system, at most 64 quiescent-state
+reports per grace period will progress all the way to the root
+<tt>rcu_node</tt> structure, thus ensuring that the lock contention
+on that root <tt>rcu_node</tt> structure remains acceptably low.
+
+</p><p>In effect, the combining tree acts like a big shock absorber,
+keeping lock contention under control at all tree levels regardless
+of the level of loading on the system.
+
+</p><p>The Linux kernel actually supports multiple flavors of RCU
+running concurrently, so RCU builds separate data structures for each
+flavor.
+For example, for <tt>CONFIG_TREE_RCU=y</tt> kernels, RCU provides
+rcu_sched and rcu_bh, as shown below:
+
+</p><p><img src="BigTreeClassicRCUBH.svg" alt="BigTreeClassicRCUBH.svg" width="33%">
+
+</p><p>Energy efficiency is increasingly important, and for that
+reason the Linux kernel provides <tt>CONFIG_NO_HZ_IDLE</tt>, which
+turns off the scheduling-clock interrupts on idle CPUs, which in
+turn allows those CPUs to attain deeper sleep states and to consume
+less energy.
+CPUs whose scheduling-clock interrupts have been turned off are
+said to be in <i>dyntick-idle mode</i>.
+RCU must handle dyntick-idle CPUs specially
+because RCU would otherwise wake up each CPU on every grace period,
+which would defeat the whole purpose of <tt>CONFIG_NO_HZ_IDLE</tt>.
+RCU uses the <tt>rcu_dynticks</tt> structure to track
+which CPUs are in dyntick idle mode, as shown below:
+
+</p><p><img src="BigTreeClassicRCUBHdyntick.svg" alt="BigTreeClassicRCUBHdyntick.svg" width="33%">
+
+</p><p>However, if a CPU is in dyntick-idle mode, it is in that mode
+for all flavors of RCU.
+Therefore, a single <tt>rcu_dynticks</tt> structure is allocated per
+CPU, and all of a given CPU's <tt>rcu_data</tt> structures share
+that <tt>rcu_dynticks</tt>, as shown in the figure.
+
+</p><p>Kernels built with <tt>CONFIG_PREEMPT_RCU</tt> support
+rcu_preempt in addition to rcu_sched and rcu_bh, as shown below:
+
+</p><p><img src="BigTreePreemptRCUBHdyntick.svg" alt="BigTreePreemptRCUBHdyntick.svg" width="35%">
+
+</p><p>RCU updaters wait for normal grace periods by registering
+RCU callbacks, either directly via <tt>call_rcu()</tt> and
+friends (namely <tt>call_rcu_bh()</tt> and <tt>call_rcu_sched()</tt>),
+there being a separate interface per flavor of RCU)
+or indirectly via <tt>synchronize_rcu()</tt> and friends.
+RCU callbacks are represented by <tt>rcu_head</tt> structures,
+which are queued on <tt>rcu_data</tt> structures while they are
+waiting for a grace period to elapse, as shown in the following figure:
+
+</p><p><img src="BigTreePreemptRCUBHdyntickCB.svg" alt="BigTreePreemptRCUBHdyntickCB.svg" width="40%">
+
+</p><p>This figure shows how <tt>TREE_RCU</tt>'s and
+<tt>PREEMPT_RCU</tt>'s major data structures are related.
+Lesser data structures will be introduced with the algorithms that
+make use of them.
+
+</p><p>Note that each of the data structures in the above figure has
+its own synchronization:
+
+<p><ol>
+<li> Each <tt>rcu_state</tt> structures has a lock and a mutex,
+ and some fields are protected by the corresponding root
+ <tt>rcu_node</tt> structure's lock.
+<li> Each <tt>rcu_node</tt> structure has a spinlock.
+<li> The fields in <tt>rcu_data</tt> are private to the corresponding
+ CPU, although a few can be read and written by other CPUs.
+<li> Similarly, the fields in <tt>rcu_dynticks</tt> are private
+ to the corresponding CPU, although a few can be read by
+ other CPUs.
+</ol>
+
+<p>It is important to note that different data structures can have
+very different ideas about the state of RCU at any given time.
+For but one example, awareness of the start or end of a given RCU
+grace period propagates slowly through the data structures.
+This slow propagation is absolutely necessary for RCU to have good
+read-side performance.
+If this balkanized implementation seems foreign to you, one useful
+trick is to consider each instance of these data structures to be
+a different person, each having the usual slightly different
+view of reality.
+
+</p><p>The general role of each of these data structures is as
+follows:
+
+</p><ol>
+<li> <tt>rcu_state</tt>:
+ This structure forms the interconnection between the
+ <tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
+ tracks grace periods, serves as short-term repository
+ for callbacks orphaned by CPU-hotplug events,
+ maintains <tt>rcu_barrier()</tt> state,
+ tracks expedited grace-period state,
+ and maintains state used to force quiescent states when
+ grace periods extend too long,
+<li> <tt>rcu_node</tt>: This structure forms the combining
+ tree that propagates quiescent-state
+ information from the leaves to the root, and also propagates
+ grace-period information from the root to the leaves.
+ It provides local copies of the grace-period state in order
+ to allow this information to be accessed in a synchronized
+ manner without suffering the scalability limitations that
+ would otherwise be imposed by global locking.
+ In <tt>CONFIG_PREEMPT_RCU</tt> kernels, it manages the lists
+ of tasks that have blocked while in their current
+ RCU read-side critical section.
+ In <tt>CONFIG_PREEMPT_RCU</tt> with
+ <tt>CONFIG_RCU_BOOST</tt>, it manages the
+ per-<tt>rcu_node</tt> priority-boosting
+ kernel threads (kthreads) and state.
+ Finally, it records CPU-hotplug state in order to determine
+ which CPUs should be ignored during a given grace period.
+<li> <tt>rcu_data</tt>: This per-CPU structure is the
+ focus of quiescent-state detection and RCU callback queuing.
+ It also tracks its relationship to the corresponding leaf
+ <tt>rcu_node</tt> structure to allow more-efficient
+ propagation of quiescent states up the <tt>rcu_node</tt>
+ combining tree.
+ Like the <tt>rcu_node</tt> structure, it provides a local
+ copy of the grace-period information to allow for-free
+ synchronized
+ access to this information from the corresponding CPU.
+ Finally, this structure records past dyntick-idle state
+ for the corresponding CPU and also tracks statistics.
+<li> <tt>rcu_dynticks</tt>:
+ This per-CPU structure tracks the current dyntick-idle
+ state for the corresponding CPU.
+ Unlike the other three structures, the <tt>rcu_dynticks</tt>
+ structure is not replicated per RCU flavor.
+<li> <tt>rcu_head</tt>:
+ This structure represents RCU callbacks, and is the
+ only structure allocated and managed by RCU users.
+ The <tt>rcu_head</tt> structure is normally embedded
+ within the RCU-protected data structure.
+</ol>
+
+<p>If all you wanted from this article was a general notion of how
+RCU's data structures are related, you are done.
+Otherwise, each of the following sections give more details on
+the <tt>rcu_state</tt>, <tt>rcu_node</tt>, <tt>rcu_data</tt>,
+and <tt>rcu_dynticks</tt> data structures.
+
+<h3><a name="The rcu_state Structure">
+The <tt>rcu_state</tt> Structure</a></h3>
+
+<p>The <tt>rcu_state</tt> structure is the base structure that
+represents a flavor of RCU.
+This structure forms the interconnection between the
+<tt>rcu_node</tt> and <tt>rcu_data</tt> structures,
+tracks grace periods, contains the lock used to
+synchronize with CPU-hotplug events,
+and maintains state used to force quiescent states when
+grace periods extend too long,
+
+</p><p>A few of the <tt>rcu_state</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+The more specialized fields are covered in the discussion of their
+use.
+
+<h5>Relationship to rcu_node and rcu_data Structures</h5>
+
+This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+ 1 struct rcu_node node[NUM_RCU_NODES];
+ 2 struct rcu_node *level[NUM_RCU_LVLS + 1];
+ 3 struct rcu_data __percpu *rda;
+</pre>
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Wait a minute!
+ You said that the <tt>rcu_node</tt> structures formed a tree,
+ but they are declared as a flat array!
+ What gives?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ The tree is laid out in the array.
+ The first node In the array is the head, the next set of nodes in the
+ array are children of the head node, and so on until the last set of
+ nodes in the array are the leaves.
+ </font>
+
+ <p><font color="ffffff">See the following diagrams to see how
+ this works.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>The <tt>rcu_node</tt> tree is embedded into the
+<tt>-&gt;node[]</tt> array as shown in the following figure:
+
+</p><p><img src="TreeMapping.svg" alt="TreeMapping.svg" width="40%">
+
+</p><p>One interesting consequence of this mapping is that a
+breadth-first traversal of the tree is implemented as a simple
+linear scan of the array, which is in fact what the
+<tt>rcu_for_each_node_breadth_first()</tt> macro does.
+This macro is used at the beginning and ends of grace periods.
+
+</p><p>Each entry of the <tt>-&gt;level</tt> array references
+the first <tt>rcu_node</tt> structure on the corresponding level
+of the tree, for example, as shown below:
+
+</p><p><img src="TreeMappingLevel.svg" alt="TreeMappingLevel.svg" width="40%">
+
+</p><p>The zero<sup>th</sup> element of the array references the root
+<tt>rcu_node</tt> structure, the first element references the
+first child of the root <tt>rcu_node</tt>, and finally the second
+element references the first leaf <tt>rcu_node</tt> structure.
+
+</p><p>For whatever it is worth, if you draw the tree to be tree-shaped
+rather than array-shaped, it is easy to draw a planar representation:
+
+</p><p><img src="TreeLevel.svg" alt="TreeLevel.svg" width="60%">
+
+</p><p>Finally, the <tt>-&gt;rda</tt> field references a per-CPU
+pointer to the corresponding CPU's <tt>rcu_data</tt> structure.
+
+</p><p>All of these fields are constant once initialization is complete,
+and therefore need no protection.
+
+<h5>Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+ 1 unsigned long gpnum;
+ 2 unsigned long completed;
+</pre>
+
+<p>RCU grace periods are numbered, and
+the <tt>-&gt;gpnum</tt> field contains the number of the grace
+period that started most recently.
+The <tt>-&gt;completed</tt> field contains the number of the
+grace period that completed most recently.
+If the two fields are equal, the RCU grace period that most recently
+started has already completed, and therefore the corresponding
+flavor of RCU is idle.
+If <tt>-&gt;gpnum</tt> is one greater than <tt>-&gt;completed</tt>,
+then <tt>-&gt;gpnum</tt> gives the number of the current RCU
+grace period, which has not yet completed.
+Any other combination of values indicates that something is broken.
+These two fields are protected by the root <tt>rcu_node</tt>'s
+<tt>-&gt;lock</tt> field.
+
+</p><p>There are <tt>-&gt;gpnum</tt> and <tt>-&gt;completed</tt> fields
+in the <tt>rcu_node</tt> and <tt>rcu_data</tt> structures
+as well.
+The fields in the <tt>rcu_state</tt> structure represent the
+most current values, and those of the other structures are compared
+in order to detect the start of a new grace period in a distributed
+fashion.
+The values flow from <tt>rcu_state</tt> to <tt>rcu_node</tt>
+(down the tree from the root to the leaves) to <tt>rcu_data</tt>.
+
+<h5>Miscellaneous</h5>
+
+<p>This portion of the <tt>rcu_state</tt> structure is declared
+as follows:
+
+<pre>
+ 1 unsigned long gp_max;
+ 2 char abbr;
+ 3 char *name;
+</pre>
+
+<p>The <tt>-&gt;gp_max</tt> field tracks the duration of the longest
+grace period in jiffies.
+It is protected by the root <tt>rcu_node</tt>'s <tt>-&gt;lock</tt>.
+
+<p>The <tt>-&gt;name</tt> field points to the name of the RCU flavor
+(for example, &ldquo;rcu_sched&rdquo;), and is constant.
+The <tt>-&gt;abbr</tt> field contains a one-character abbreviation,
+for example, &ldquo;s&rdquo; for RCU-sched.
+
+<h3><a name="The rcu_node Structure">
+The <tt>rcu_node</tt> Structure</a></h3>
+
+<p>The <tt>rcu_node</tt> structures form the combining
+tree that propagates quiescent-state
+information from the leaves to the root and also that propagates
+grace-period information from the root down to the leaves.
+They provides local copies of the grace-period state in order
+to allow this information to be accessed in a synchronized
+manner without suffering the scalability limitations that
+would otherwise be imposed by global locking.
+In <tt>CONFIG_PREEMPT_RCU</tt> kernels, they manage the lists
+of tasks that have blocked while in their current
+RCU read-side critical section.
+In <tt>CONFIG_PREEMPT_RCU</tt> with
+<tt>CONFIG_RCU_BOOST</tt>, they manage the
+per-<tt>rcu_node</tt> priority-boosting
+kernel threads (kthreads) and state.
+Finally, they record CPU-hotplug state in order to determine
+which CPUs should be ignored during a given grace period.
+
+</p><p>The <tt>rcu_node</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+
+<h5>Connection to Combining Tree</h5>
+
+<p>This portion of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+ 1 struct rcu_node *parent;
+ 2 u8 level;
+ 3 u8 grpnum;
+ 4 unsigned long grpmask;
+ 5 int grplo;
+ 6 int grphi;
+</pre>
+
+<p>The <tt>-&gt;parent</tt> pointer references the <tt>rcu_node</tt>
+one level up in the tree, and is <tt>NULL</tt> for the root
+<tt>rcu_node</tt>.
+The RCU implementation makes heavy use of this field to push quiescent
+states up the tree.
+The <tt>-&gt;level</tt> field gives the level in the tree, with
+the root being at level zero, its children at level one, and so on.
+The <tt>-&gt;grpnum</tt> field gives this node's position within
+the children of its parent, so this number can range between 0 and 31
+on 32-bit systems and between 0 and 63 on 64-bit systems.
+The <tt>-&gt;level</tt> and <tt>-&gt;grpnum</tt> fields are
+used only during initialization and for tracing.
+The <tt>-&gt;grpmask</tt> field is the bitmask counterpart of
+<tt>-&gt;grpnum</tt>, and therefore always has exactly one bit set.
+This mask is used to clear the bit corresponding to this <tt>rcu_node</tt>
+structure in its parent's bitmasks, which are described later.
+Finally, the <tt>-&gt;grplo</tt> and <tt>-&gt;grphi</tt> fields
+contain the lowest and highest numbered CPU served by this
+<tt>rcu_node</tt> structure, respectively.
+
+</p><p>All of these fields are constant, and thus do not require any
+synchronization.
+
+<h5>Synchronization</h5>
+
+<p>This field of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+ 1 raw_spinlock_t lock;
+</pre>
+
+<p>This field is used to protect the remaining fields in this structure,
+unless otherwise stated.
+That said, all of the fields in this structure can be accessed without
+locking for tracing purposes.
+Yes, this can result in confusing traces, but better some tracing confusion
+than to be heisenbugged out of existence.
+
+<h5>Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_node</tt> structure is declared
+as follows:
+
+<pre>
+ 1 unsigned long gpnum;
+ 2 unsigned long completed;
+</pre>
+
+<p>These fields are the counterparts of the fields of the same name in
+the <tt>rcu_state</tt> structure.
+They each may lag up to one behind their <tt>rcu_state</tt>
+counterparts.
+If a given <tt>rcu_node</tt> structure's <tt>-&gt;gpnum</tt> and
+<tt>-&gt;complete</tt> fields are equal, then this <tt>rcu_node</tt>
+structure believes that RCU is idle.
+Otherwise, as with the <tt>rcu_state</tt> structure,
+the <tt>-&gt;gpnum</tt> field will be one greater than the
+<tt>-&gt;complete</tt> fields, with <tt>-&gt;gpnum</tt>
+indicating which grace period this <tt>rcu_node</tt> believes
+is still being waited for.
+
+</p><p>The <tt>&gt;gpnum</tt> field of each <tt>rcu_node</tt>
+structure is updated at the beginning
+of each grace period, and the <tt>-&gt;completed</tt> fields are
+updated at the end of each grace period.
+
+<h5>Quiescent-State Tracking</h5>
+
+<p>These fields manage the propagation of quiescent states up the
+combining tree.
+
+</p><p>This portion of the <tt>rcu_node</tt> structure has fields
+as follows:
+
+<pre>
+ 1 unsigned long qsmask;
+ 2 unsigned long expmask;
+ 3 unsigned long qsmaskinit;
+ 4 unsigned long expmaskinit;
+</pre>
+
+<p>The <tt>-&gt;qsmask</tt> field tracks which of this
+<tt>rcu_node</tt> structure's children still need to report
+quiescent states for the current normal grace period.
+Such children will have a value of 1 in their corresponding bit.
+Note that the leaf <tt>rcu_node</tt> structures should be
+thought of as having <tt>rcu_data</tt> structures as their
+children.
+Similarly, the <tt>-&gt;expmask</tt> field tracks which
+of this <tt>rcu_node</tt> structure's children still need to report
+quiescent states for the current expedited grace period.
+An expedited grace period has
+the same conceptual properties as a normal grace period, but the
+expedited implementation accepts extreme CPU overhead to obtain
+much lower grace-period latency, for example, consuming a few
+tens of microseconds worth of CPU time to reduce grace-period
+duration from milliseconds to tens of microseconds.
+The <tt>-&gt;qsmaskinit</tt> field tracks which of this
+<tt>rcu_node</tt> structure's children cover for at least
+one online CPU.
+This mask is used to initialize <tt>-&gt;qsmask</tt>,
+and <tt>-&gt;expmaskinit</tt> is used to initialize
+<tt>-&gt;expmask</tt> and the beginning of the
+normal and expedited grace periods, respectively.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why are these bitmasks protected by locking?
+ Come on, haven't you heard of atomic instructions???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Lockless grace-period computation! Such a tantalizing possibility!
+ </font>
+
+ <p><font color="ffffff">But consider the following sequence of events:
+ </font>
+
+ <ol>
+ <li> <font color="ffffff">CPU&nbsp;0 has been in dyntick-idle
+ mode for quite some time.
+ When it wakes up, it notices that the current RCU
+ grace period needs it to report in, so it sets a
+ flag where the scheduling clock interrupt will find it.
+ </font><p>
+ <li> <font color="ffffff">Meanwhile, CPU&nbsp;1 is running
+ <tt>force_quiescent_state()</tt>,
+ and notices that CPU&nbsp;0 has been in dyntick idle mode,
+ which qualifies as an extended quiescent state.
+ </font><p>
+ <li> <font color="ffffff">CPU&nbsp;0's scheduling clock
+ interrupt fires in the
+ middle of an RCU read-side critical section, and notices
+ that the RCU core needs something, so commences RCU softirq
+ processing.
+ </font>
+ <p>
+ <li> <font color="ffffff">CPU&nbsp;0's softirq handler
+ executes and is just about ready
+ to report its quiescent state up the <tt>rcu_node</tt>
+ tree.
+ </font><p>
+ <li> <font color="ffffff">But CPU&nbsp;1 beats it to the punch,
+ completing the current
+ grace period and starting a new one.
+ </font><p>
+ <li> <font color="ffffff">CPU&nbsp;0 now reports its quiescent
+ state for the wrong
+ grace period.
+ That grace period might now end before the RCU read-side
+ critical section.
+ If that happens, disaster will ensue.
+ </font>
+ </ol>
+
+ <p><font color="ffffff">So the locking is absolutely required in
+ order to coordinate
+ clearing of the bits with the grace-period numbers in
+ <tt>-&gt;gpnum</tt> and <tt>-&gt;completed</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h5>Blocked-Task Management</h5>
+
+<p><tt>PREEMPT_RCU</tt> allows tasks to be preempted in the
+midst of their RCU read-side critical sections, and these tasks
+must be tracked explicitly.
+The details of exactly why and how they are tracked will be covered
+in a separate article on RCU read-side processing.
+For now, it is enough to know that the <tt>rcu_node</tt>
+structure tracks them.
+
+<pre>
+ 1 struct list_head blkd_tasks;
+ 2 struct list_head *gp_tasks;
+ 3 struct list_head *exp_tasks;
+ 4 bool wait_blkd_tasks;
+</pre>
+
+<p>The <tt>-&gt;blkd_tasks</tt> field is a list header for
+the list of blocked and preempted tasks.
+As tasks undergo context switches within RCU read-side critical
+sections, their <tt>task_struct</tt> structures are enqueued
+(via the <tt>task_struct</tt>'s <tt>-&gt;rcu_node_entry</tt>
+field) onto the head of the <tt>-&gt;blkd_tasks</tt> list for the
+leaf <tt>rcu_node</tt> structure corresponding to the CPU
+on which the outgoing context switch executed.
+As these tasks later exit their RCU read-side critical sections,
+they remove themselves from the list.
+This list is therefore in reverse time order, so that if one of the tasks
+is blocking the current grace period, all subsequent tasks must
+also be blocking that same grace period.
+Therefore, a single pointer into this list suffices to track
+all tasks blocking a given grace period.
+That pointer is stored in <tt>-&gt;gp_tasks</tt> for normal
+grace periods and in <tt>-&gt;exp_tasks</tt> for expedited
+grace periods.
+These last two fields are <tt>NULL</tt> if either there is
+no grace period in flight or if there are no blocked tasks
+preventing that grace period from completing.
+If either of these two pointers is referencing a task that
+removes itself from the <tt>-&gt;blkd_tasks</tt> list,
+then that task must advance the pointer to the next task on
+the list, or set the pointer to <tt>NULL</tt> if there
+are no subsequent tasks on the list.
+
+</p><p>For example, suppose that tasks&nbsp;T1, T2, and&nbsp;T3 are
+all hard-affinitied to the largest-numbered CPU in the system.
+Then if task&nbsp;T1 blocked in an RCU read-side
+critical section, then an expedited grace period started,
+then task&nbsp;T2 blocked in an RCU read-side critical section,
+then a normal grace period started, and finally task&nbsp;3 blocked
+in an RCU read-side critical section, then the state of the
+last leaf <tt>rcu_node</tt> structure's blocked-task list
+would be as shown below:
+
+</p><p><img src="blkd_task.svg" alt="blkd_task.svg" width="60%">
+
+</p><p>Task&nbsp;T1 is blocking both grace periods, task&nbsp;T2 is
+blocking only the normal grace period, and task&nbsp;T3 is blocking
+neither grace period.
+Note that these tasks will not remove themselves from this list
+immediately upon resuming execution.
+They will instead remain on the list until they execute the outermost
+<tt>rcu_read_unlock()</tt> that ends their RCU read-side critical
+section.
+
+<p>
+The <tt>-&gt;wait_blkd_tasks</tt> field indicates whether or not
+the current grace period is waiting on a blocked task.
+
+<h5>Sizing the <tt>rcu_node</tt> Array</h5>
+
+<p>The <tt>rcu_node</tt> array is sized via a series of
+C-preprocessor expressions as follows:
+
+<pre>
+ 1 #ifdef CONFIG_RCU_FANOUT
+ 2 #define RCU_FANOUT CONFIG_RCU_FANOUT
+ 3 #else
+ 4 # ifdef CONFIG_64BIT
+ 5 # define RCU_FANOUT 64
+ 6 # else
+ 7 # define RCU_FANOUT 32
+ 8 # endif
+ 9 #endif
+10
+11 #ifdef CONFIG_RCU_FANOUT_LEAF
+12 #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF
+13 #else
+14 # ifdef CONFIG_64BIT
+15 # define RCU_FANOUT_LEAF 64
+16 # else
+17 # define RCU_FANOUT_LEAF 32
+18 # endif
+19 #endif
+20
+21 #define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
+22 #define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT)
+23 #define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT)
+24 #define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT)
+25
+26 #if NR_CPUS &lt;= RCU_FANOUT_1
+27 # define RCU_NUM_LVLS 1
+28 # define NUM_RCU_LVL_0 1
+29 # define NUM_RCU_NODES NUM_RCU_LVL_0
+30 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 }
+31 # define RCU_NODE_NAME_INIT { "rcu_node_0" }
+32 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" }
+33 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0" }
+34 #elif NR_CPUS &lt;= RCU_FANOUT_2
+35 # define RCU_NUM_LVLS 2
+36 # define NUM_RCU_LVL_0 1
+37 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+38 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1)
+39 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
+40 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" }
+41 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" }
+42 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" }
+43 #elif NR_CPUS &lt;= RCU_FANOUT_3
+44 # define RCU_NUM_LVLS 3
+45 # define NUM_RCU_LVL_0 1
+46 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+47 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+48 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2)
+49 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
+50 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
+51 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
+52 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
+53 #elif NR_CPUS &lt;= RCU_FANOUT_4
+54 # define RCU_NUM_LVLS 4
+55 # define NUM_RCU_LVL_0 1
+56 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
+57 # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
+58 # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
+59 # define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+60 # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
+61 # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
+62 # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
+63 # define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
+64 #else
+65 # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+66 #endif
+</pre>
+
+<p>The maximum number of levels in the <tt>rcu_node</tt> structure
+is currently limited to four, as specified by lines&nbsp;21-24
+and the structure of the subsequent &ldquo;if&rdquo; statement.
+For 32-bit systems, this allows 16*32*32*32=524,288 CPUs, which
+should be sufficient for the next few years at least.
+For 64-bit systems, 16*64*64*64=4,194,304 CPUs is allowed, which
+should see us through the next decade or so.
+This four-level tree also allows kernels built with
+<tt>CONFIG_RCU_FANOUT=8</tt> to support up to 4096 CPUs,
+which might be useful in very large systems having eight CPUs per
+socket (but please note that no one has yet shown any measurable
+performance degradation due to misaligned socket and <tt>rcu_node</tt>
+boundaries).
+In addition, building kernels with a full four levels of <tt>rcu_node</tt>
+tree permits better testing of RCU's combining-tree code.
+
+</p><p>The <tt>RCU_FANOUT</tt> symbol controls how many children
+are permitted at each non-leaf level of the <tt>rcu_node</tt> tree.
+If the <tt>CONFIG_RCU_FANOUT</tt> Kconfig option is not specified,
+it is set based on the word size of the system, which is also
+the Kconfig default.
+
+</p><p>The <tt>RCU_FANOUT_LEAF</tt> symbol controls how many CPUs are
+handled by each leaf <tt>rcu_node</tt> structure.
+Experience has shown that allowing a given leaf <tt>rcu_node</tt>
+structure to handle 64 CPUs, as permitted by the number of bits in
+the <tt>-&gt;qsmask</tt> field on a 64-bit system, results in
+excessive contention for the leaf <tt>rcu_node</tt> structures'
+<tt>-&gt;lock</tt> fields.
+The number of CPUs per leaf <tt>rcu_node</tt> structure is therefore
+limited to 16 given the default value of <tt>CONFIG_RCU_FANOUT_LEAF</tt>.
+If <tt>CONFIG_RCU_FANOUT_LEAF</tt> is unspecified, the value
+selected is based on the word size of the system, just as for
+<tt>CONFIG_RCU_FANOUT</tt>.
+Lines&nbsp;11-19 perform this computation.
+
+</p><p>Lines&nbsp;21-24 compute the maximum number of CPUs supported by
+a single-level (which contains a single <tt>rcu_node</tt> structure),
+two-level, three-level, and four-level <tt>rcu_node</tt> tree,
+respectively, given the fanout specified by <tt>RCU_FANOUT</tt>
+and <tt>RCU_FANOUT_LEAF</tt>.
+These numbers of CPUs are retained in the
+<tt>RCU_FANOUT_1</tt>,
+<tt>RCU_FANOUT_2</tt>,
+<tt>RCU_FANOUT_3</tt>, and
+<tt>RCU_FANOUT_4</tt>
+C-preprocessor variables, respectively.
+
+</p><p>These variables are used to control the C-preprocessor <tt>#if</tt>
+statement spanning lines&nbsp;26-66 that computes the number of
+<tt>rcu_node</tt> structures required for each level of the tree,
+as well as the number of levels required.
+The number of levels is placed in the <tt>NUM_RCU_LVLS</tt>
+C-preprocessor variable by lines&nbsp;27, 35, 44, and&nbsp;54.
+The number of <tt>rcu_node</tt> structures for the topmost level
+of the tree is always exactly one, and this value is unconditionally
+placed into <tt>NUM_RCU_LVL_0</tt> by lines&nbsp;28, 36, 45, and&nbsp;55.
+The rest of the levels (if any) of the <tt>rcu_node</tt> tree
+are computed by dividing the maximum number of CPUs by the
+fanout supported by the number of levels from the current level down,
+rounding up. This computation is performed by lines&nbsp;37,
+46-47, and&nbsp;56-58.
+Lines&nbsp;31-33, 40-42, 50-52, and&nbsp;62-63 create initializers
+for lockdep lock-class names.
+Finally, lines&nbsp;64-66 produce an error if the maximum number of
+CPUs is too large for the specified fanout.
+
+<h3><a name="The rcu_data Structure">
+The <tt>rcu_data</tt> Structure</a></h3>
+
+<p>The <tt>rcu_data</tt> maintains the per-CPU state for the
+corresponding flavor of RCU.
+The fields in this structure may be accessed only from the corresponding
+CPU (and from tracing) unless otherwise stated.
+This structure is the
+focus of quiescent-state detection and RCU callback queuing.
+It also tracks its relationship to the corresponding leaf
+<tt>rcu_node</tt> structure to allow more-efficient
+propagation of quiescent states up the <tt>rcu_node</tt>
+combining tree.
+Like the <tt>rcu_node</tt> structure, it provides a local
+copy of the grace-period information to allow for-free
+synchronized
+access to this information from the corresponding CPU.
+Finally, this structure records past dyntick-idle state
+for the corresponding CPU and also tracks statistics.
+
+</p><p>The <tt>rcu_data</tt> structure's fields are discussed,
+singly and in groups, in the following sections.
+
+<h5>Connection to Other Data Structures</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 int cpu;
+ 2 struct rcu_state *rsp;
+ 3 struct rcu_node *mynode;
+ 4 struct rcu_dynticks *dynticks;
+ 5 unsigned long grpmask;
+ 6 bool beenonline;
+</pre>
+
+<p>The <tt>-&gt;cpu</tt> field contains the number of the
+corresponding CPU, the <tt>-&gt;rsp</tt> pointer references
+the corresponding <tt>rcu_state</tt> structure (and is most frequently
+used to locate the name of the corresponding flavor of RCU for tracing),
+and the <tt>-&gt;mynode</tt> field references the corresponding
+<tt>rcu_node</tt> structure.
+The <tt>-&gt;mynode</tt> is used to propagate quiescent states
+up the combining tree.
+<p>The <tt>-&gt;dynticks</tt> pointer references the
+<tt>rcu_dynticks</tt> structure corresponding to this
+CPU.
+Recall that a single per-CPU instance of the <tt>rcu_dynticks</tt>
+structure is shared among all flavors of RCU.
+These first four fields are constant and therefore require not
+synchronization.
+
+</p><p>The <tt>-&gt;grpmask</tt> field indicates the bit in
+the <tt>-&gt;mynode-&gt;qsmask</tt> corresponding to this
+<tt>rcu_data</tt> structure, and is also used when propagating
+quiescent states.
+The <tt>-&gt;beenonline</tt> flag is set whenever the corresponding
+CPU comes online, which means that the debugfs tracing need not dump
+out any <tt>rcu_data</tt> structure for which this flag is not set.
+
+<h5>Quiescent-State and Grace-Period Tracking</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 unsigned long completed;
+ 2 unsigned long gpnum;
+ 3 bool cpu_no_qs;
+ 4 bool core_needs_qs;
+ 5 bool gpwrap;
+ 6 unsigned long rcu_qs_ctr_snap;
+</pre>
+
+<p>The <tt>completed</tt> and <tt>gpnum</tt>
+fields are the counterparts of the fields of the same name
+in the <tt>rcu_state</tt> and <tt>rcu_node</tt> structures.
+They may each lag up to one behind their <tt>rcu_node</tt>
+counterparts, but in <tt>CONFIG_NO_HZ_IDLE</tt> and
+<tt>CONFIG_NO_HZ_FULL</tt> kernels can lag
+arbitrarily far behind for CPUs in dyntick-idle mode (but these counters
+will catch up upon exit from dyntick-idle mode).
+If a given <tt>rcu_data</tt> structure's <tt>-&gt;gpnum</tt> and
+<tt>-&gt;complete</tt> fields are equal, then this <tt>rcu_data</tt>
+structure believes that RCU is idle.
+Otherwise, as with the <tt>rcu_state</tt> and <tt>rcu_node</tt>
+structure,
+the <tt>-&gt;gpnum</tt> field will be one greater than the
+<tt>-&gt;complete</tt> fields, with <tt>-&gt;gpnum</tt>
+indicating which grace period this <tt>rcu_data</tt> believes
+is still being waited for.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ All this replication of the grace period numbers can only cause
+ massive confusion.
+ Why not just keep a global pair of counters and be done with it???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Because if there was only a single global pair of grace-period
+ numbers, there would need to be a single global lock to allow
+ safely accessing and updating them.
+ And if we are not going to have a single global lock, we need
+ to carefully manage the numbers on a per-node basis.
+ Recall from the answer to a previous Quick Quiz that the consequences
+ of applying a previously sampled quiescent state to the wrong
+ grace period are quite severe.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>The <tt>-&gt;cpu_no_qs</tt> flag indicates that the
+CPU has not yet passed through a quiescent state,
+while the <tt>-&gt;core_needs_qs</tt> flag indicates that the
+RCU core needs a quiescent state from the corresponding CPU.
+The <tt>-&gt;gpwrap</tt> field indicates that the corresponding
+CPU has remained idle for so long that the <tt>completed</tt>
+and <tt>gpnum</tt> counters are in danger of overflow, which
+will cause the CPU to disregard the values of its counters on
+its next exit from idle.
+Finally, the <tt>rcu_qs_ctr_snap</tt> field is used to detect
+cases where a given operation has resulted in a quiescent state
+for all flavors of RCU, for example, <tt>cond_resched_rcu_qs()</tt>.
+
+<h5>RCU Callback Handling</h5>
+
+<p>In the absence of CPU-hotplug events, RCU callbacks are invoked by
+the same CPU that registered them.
+This is strictly a cache-locality optimization: callbacks can and
+do get invoked on CPUs other than the one that registered them.
+After all, if the CPU that registered a given callback has gone
+offline before the callback can be invoked, there really is no other
+choice.
+
+</p><p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 struct rcu_head *nxtlist;
+ 2 struct rcu_head **nxttail[RCU_NEXT_SIZE];
+ 3 unsigned long nxtcompleted[RCU_NEXT_SIZE];
+ 4 long qlen_lazy;
+ 5 long qlen;
+ 6 long qlen_last_fqs_check;
+ 7 unsigned long n_force_qs_snap;
+ 8 unsigned long n_cbs_invoked;
+ 9 unsigned long n_cbs_orphaned;
+10 unsigned long n_cbs_adopted;
+11 long blimit;
+</pre>
+
+<p>The <tt>-&gt;nxtlist</tt> pointer and the
+<tt>-&gt;nxttail[]</tt> array form a four-segment list with
+older callbacks near the head and newer ones near the tail.
+Each segment contains callbacks with the corresponding relationship
+to the current grace period.
+The pointer out of the end of each of the four segments is referenced
+by the element of the <tt>-&gt;nxttail[]</tt> array indexed by
+<tt>RCU_DONE_TAIL</tt> (for callbacks handled by a prior grace period),
+<tt>RCU_WAIT_TAIL</tt> (for callbacks waiting on the current grace period),
+<tt>RCU_NEXT_READY_TAIL</tt> (for callbacks that will wait on the next
+grace period), and
+<tt>RCU_NEXT_TAIL</tt> (for callbacks that are not yet associated
+with a specific grace period)
+respectively, as shown in the following figure.
+
+</p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%">
+
+</p><p>In this figure, the <tt>-&gt;nxtlist</tt> pointer references the
+first
+RCU callback in the list.
+The <tt>-&gt;nxttail[RCU_DONE_TAIL]</tt> array element references
+the <tt>-&gt;nxtlist</tt> pointer itself, indicating that none
+of the callbacks is ready to invoke.
+The <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt> array element references callback
+CB&nbsp;2's <tt>-&gt;next</tt> pointer, which indicates that
+CB&nbsp;1 and CB&nbsp;2 are both waiting on the current grace period.
+The <tt>-&gt;nxttail[RCU_NEXT_READY_TAIL]</tt> array element
+references the same RCU callback that <tt>-&gt;nxttail[RCU_WAIT_TAIL]</tt>
+does, which indicates that there are no callbacks waiting on the next
+RCU grace period.
+The <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element references
+CB&nbsp;4's <tt>-&gt;next</tt> pointer, indicating that all the
+remaining RCU callbacks have not yet been assigned to an RCU grace
+period.
+Note that the <tt>-&gt;nxttail[RCU_NEXT_TAIL]</tt> array element
+always references the last RCU callback's <tt>-&gt;next</tt> pointer
+unless the callback list is empty, in which case it references
+the <tt>-&gt;nxtlist</tt> pointer.
+
+</p><p>CPUs advance their callbacks from the
+<tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the
+<tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments
+as grace periods advance.
+The CPU advances the callbacks in its <tt>rcu_data</tt> structure
+whenever it notices that another RCU grace period has completed.
+The CPU detects the completion of an RCU grace period by noticing
+that the value of its <tt>rcu_data</tt> structure's
+<tt>-&gt;completed</tt> field differs from that of its leaf
+<tt>rcu_node</tt> structure.
+Recall that each <tt>rcu_node</tt> structure's
+<tt>-&gt;completed</tt> field is updated at the end of each
+grace period.
+
+</p><p>The <tt>-&gt;nxtcompleted[]</tt> array records grace-period
+numbers corresponding to the list segments.
+This allows CPUs that go idle for extended periods to determine
+which of their callbacks are ready to be invoked after reawakening.
+
+</p><p>The <tt>-&gt;qlen</tt> counter contains the number of
+callbacks in <tt>-&gt;nxtlist</tt>, and the
+<tt>-&gt;qlen_lazy</tt> contains the number of those callbacks that
+are known to only free memory, and whose invocation can therefore
+be safely deferred.
+The <tt>-&gt;qlen_last_fqs_check</tt> and
+<tt>-&gt;n_force_qs_snap</tt> coordinate the forcing of quiescent
+states from <tt>call_rcu()</tt> and friends when callback
+lists grow excessively long.
+
+</p><p>The <tt>-&gt;n_cbs_invoked</tt>,
+<tt>-&gt;n_cbs_orphaned</tt>, and <tt>-&gt;n_cbs_adopted</tt>
+fields count the number of callbacks invoked,
+sent to other CPUs when this CPU goes offline,
+and received from other CPUs when those other CPUs go offline.
+Finally, the <tt>-&gt;blimit</tt> counter is the maximum number of
+RCU callbacks that may be invoked at a given time.
+
+<h5>Dyntick-Idle Handling</h5>
+
+<p>This portion of the <tt>rcu_data</tt> structure is declared
+as follows:
+
+<pre>
+ 1 int dynticks_snap;
+ 2 unsigned long dynticks_fqs;
+</pre>
+
+The <tt>-&gt;dynticks_snap</tt> field is used to take a snapshot
+of the corresponding CPU's dyntick-idle state when forcing
+quiescent states, and is therefore accessed from other CPUs.
+Finally, the <tt>-&gt;dynticks_fqs</tt> field is used to
+count the number of times this CPU is determined to be in
+dyntick-idle state, and is used for tracing and debugging purposes.
+
+<h3><a name="The rcu_dynticks Structure">
+The <tt>rcu_dynticks</tt> Structure</a></h3>
+
+<p>The <tt>rcu_dynticks</tt> maintains the per-CPU dyntick-idle state
+for the corresponding CPU.
+Unlike the other structures, <tt>rcu_dynticks</tt> is not
+replicated over the different flavors of RCU.
+The fields in this structure may be accessed only from the corresponding
+CPU (and from tracing) unless otherwise stated.
+Its fields are as follows:
+
+<pre>
+ 1 int dynticks_nesting;
+ 2 int dynticks_nmi_nesting;
+ 3 atomic_t dynticks;
+</pre>
+
+<p>The <tt>-&gt;dynticks_nesting</tt> field counts the
+nesting depth of normal interrupts.
+In addition, this counter is incremented when exiting dyntick-idle
+mode and decremented when entering it.
+This counter can therefore be thought of as counting the number
+of reasons why this CPU cannot be permitted to enter dyntick-idle
+mode, aside from non-maskable interrupts (NMIs).
+NMIs are counted by the <tt>-&gt;dynticks_nmi_nesting</tt>
+field, except that NMIs that interrupt non-dyntick-idle execution
+are not counted.
+
+</p><p>Finally, the <tt>-&gt;dynticks</tt> field counts the corresponding
+CPU's transitions to and from dyntick-idle mode, so that this counter
+has an even value when the CPU is in dyntick-idle mode and an odd
+value otherwise.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why not just count all NMIs?
+ Wouldn't that be simpler and less error prone?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ It seems simpler only until you think hard about how to go about
+ updating the <tt>rcu_dynticks</tt> structure's
+ <tt>-&gt;dynticks</tt> field.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>Additional fields are present for some special-purpose
+builds, and are discussed separately.
+
+<h3><a name="The rcu_head Structure">
+The <tt>rcu_head</tt> Structure</a></h3>
+
+<p>Each <tt>rcu_head</tt> structure represents an RCU callback.
+These structures are normally embedded within RCU-protected data
+structures whose algorithms use asynchronous grace periods.
+In contrast, when using algorithms that block waiting for RCU grace periods,
+RCU users need not provide <tt>rcu_head</tt> structures.
+
+</p><p>The <tt>rcu_head</tt> structure has fields as follows:
+
+<pre>
+ 1 struct rcu_head *next;
+ 2 void (*func)(struct rcu_head *head);
+</pre>
+
+<p>The <tt>-&gt;next</tt> field is used
+to link the <tt>rcu_head</tt> structures together in the
+lists within the <tt>rcu_data</tt> structures.
+The <tt>-&gt;func</tt> field is a pointer to the function
+to be called when the callback is ready to be invoked, and
+this function is passed a pointer to the <tt>rcu_head</tt>
+structure.
+However, <tt>kfree_rcu()</tt> uses the <tt>-&gt;func</tt>
+field to record the offset of the <tt>rcu_head</tt>
+structure within the enclosing RCU-protected data structure.
+
+</p><p>Both of these fields are used internally by RCU.
+From the viewpoint of RCU users, this structure is an
+opaque &ldquo;cookie&rdquo;.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Given that the callback function <tt>-&gt;func</tt>
+ is passed a pointer to the <tt>rcu_head</tt> structure,
+ how is that function supposed to find the beginning of the
+ enclosing RCU-protected data structure?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In actual practice, there is a separate callback function per
+ type of RCU-protected data structure.
+ The callback function can therefore use the <tt>container_of()</tt>
+ macro in the Linux kernel (or other pointer-manipulation facilities
+ in other software environments) to find the beginning of the
+ enclosing structure.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h3><a name="RCU-Specific Fields in the task_struct Structure">
+RCU-Specific Fields in the <tt>task_struct</tt> Structure</a></h3>
+
+<p>The <tt>CONFIG_PREEMPT_RCU</tt> implementation uses some
+additional fields in the <tt>task_struct</tt> structure:
+
+<pre>
+ 1 #ifdef CONFIG_PREEMPT_RCU
+ 2 int rcu_read_lock_nesting;
+ 3 union rcu_special rcu_read_unlock_special;
+ 4 struct list_head rcu_node_entry;
+ 5 struct rcu_node *rcu_blocked_node;
+ 6 #endif /* #ifdef CONFIG_PREEMPT_RCU */
+ 7 #ifdef CONFIG_TASKS_RCU
+ 8 unsigned long rcu_tasks_nvcsw;
+ 9 bool rcu_tasks_holdout;
+10 struct list_head rcu_tasks_holdout_list;
+11 int rcu_tasks_idle_cpu;
+12 #endif /* #ifdef CONFIG_TASKS_RCU */
+</pre>
+
+<p>The <tt>-&gt;rcu_read_lock_nesting</tt> field records the
+nesting level for RCU read-side critical sections, and
+the <tt>-&gt;rcu_read_unlock_special</tt> field is a bitmask
+that records special conditions that require <tt>rcu_read_unlock()</tt>
+to do additional work.
+The <tt>-&gt;rcu_node_entry</tt> field is used to form lists of
+tasks that have blocked within preemptible-RCU read-side critical
+sections and the <tt>-&gt;rcu_blocked_node</tt> field references
+the <tt>rcu_node</tt> structure whose list this task is a member of,
+or <tt>NULL</tt> if it is not blocked within a preemptible-RCU
+read-side critical section.
+
+<p>The <tt>-&gt;rcu_tasks_nvcsw</tt> field tracks the number of
+voluntary context switches that this task had undergone at the
+beginning of the current tasks-RCU grace period,
+<tt>-&gt;rcu_tasks_holdout</tt> is set if the current tasks-RCU
+grace period is waiting on this task, <tt>-&gt;rcu_tasks_holdout_list</tt>
+is a list element enqueuing this task on the holdout list,
+and <tt>-&gt;rcu_tasks_idle_cpu</tt> tracks which CPU this
+idle task is running, but only if the task is currently running,
+that is, if the CPU is currently idle.
+
+<h3><a name="Accessor Functions">
+Accessor Functions</a></h3>
+
+<p>The following listing shows the
+<tt>rcu_get_root()</tt>, <tt>rcu_for_each_node_breadth_first</tt>,
+<tt>rcu_for_each_nonleaf_node_breadth_first()</tt>, and
+<tt>rcu_for_each_leaf_node()</tt> function and macros:
+
+<pre>
+ 1 static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
+ 2 {
+ 3 return &amp;rsp-&gt;node[0];
+ 4 }
+ 5
+ 6 #define rcu_for_each_node_breadth_first(rsp, rnp) \
+ 7 for ((rnp) = &amp;(rsp)-&gt;node[0]; \
+ 8 (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
+ 9
+ 10 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
+ 11 for ((rnp) = &amp;(rsp)-&gt;node[0]; \
+ 12 (rnp) &lt; (rsp)-&gt;level[NUM_RCU_LVLS - 1]; (rnp)++)
+ 13
+ 14 #define rcu_for_each_leaf_node(rsp, rnp) \
+ 15 for ((rnp) = (rsp)-&gt;level[NUM_RCU_LVLS - 1]; \
+ 16 (rnp) &lt; &amp;(rsp)-&gt;node[NUM_RCU_NODES]; (rnp)++)
+</pre>
+
+<p>The <tt>rcu_get_root()</tt> simply returns a pointer to the
+first element of the specified <tt>rcu_state</tt> structure's
+<tt>-&gt;node[]</tt> array, which is the root <tt>rcu_node</tt>
+structure.
+
+</p><p>As noted earlier, the <tt>rcu_for_each_node_breadth_first()</tt>
+macro takes advantage of the layout of the <tt>rcu_node</tt>
+structures in the <tt>rcu_state</tt> structure's
+<tt>-&gt;node[]</tt> array, performing a breadth-first traversal by
+simply traversing the array in order.
+The <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> macro operates
+similarly, but traverses only the first part of the array, thus excluding
+the leaf <tt>rcu_node</tt> structures.
+Finally, the <tt>rcu_for_each_leaf_node()</tt> macro traverses only
+the last part of the array, thus traversing only the leaf
+<tt>rcu_node</tt> structures.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ What do <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> and
+ <tt>rcu_for_each_leaf_node()</tt> do if the <tt>rcu_node</tt> tree
+ contains only a single node?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In the single-node case,
+ <tt>rcu_for_each_nonleaf_node_breadth_first()</tt> is a no-op
+ and <tt>rcu_for_each_leaf_node()</tt> traverses the single node.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<h3><a name="Summary">
+Summary</a></h3>
+
+So each flavor of RCU is represented by an <tt>rcu_state</tt> structure,
+which contains a combining tree of <tt>rcu_node</tt> and
+<tt>rcu_data</tt> structures.
+Finally, in <tt>CONFIG_NO_HZ_IDLE</tt> kernels, each CPU's dyntick-idle
+state is tracked by an <tt>rcu_dynticks</tt> structure.
+
+If you made it this far, you are well prepared to read the code
+walkthroughs in the other articles in this series.
+
+<h3><a name="Acknowledgments">
+Acknowledgments</a></h3>
+
+I owe thanks to Cyrill Gorcunov, Mathieu Desnoyers, Dhaval Giani, Paul
+Turner, Abhishek Srivastava, Matt Kowalczyk, and Serge Hallyn
+for helping me get this document into a more human-readable state.
+
+<h3><a name="Legal Statement">
+Legal Statement</a></h3>
+
+<p>This work represents the view of the author and does not necessarily
+represent the view of IBM.
+
+</p><p>Linux is a registered trademark of Linus Torvalds.
+
+</p><p>Other company, product, and service names may be trademarks or
+service marks of others.
+
+</body></html>
diff --git a/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg b/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg
new file mode 100644
index 000000000000..2bf12b468206
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/HugeTreeClassicRCU.svg
@@ -0,0 +1,939 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:37:22 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="15.1in"
+ height="11.2in"
+ viewBox="-66 -66 18087 13407"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="HugeTreeClassicRCU.fig">
+ <metadata
+ id="metadata224">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs222">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3982"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1134"
+ inkscape:window-height="789"
+ id="namedview220"
+ showgrid="false"
+ inkscape:zoom="0.60515873"
+ inkscape:cx="679.5"
+ inkscape:cy="504"
+ inkscape:window-x="786"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="0"
+ width="17100"
+ height="8325"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="11025"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="4275"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="5400"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="9900"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="14400"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect16" />
+ <!-- Line: box -->
+ <rect
+ x="900"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect18" />
+ <!-- Line: box -->
+ <rect
+ x="7650"
+ y="900"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect20" />
+ <!-- Line -->
+ <polyline
+ points="3150,9225 3150,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline22" />
+ <!-- Arrowhead on XXXpoint 3150 9225 - 3150 7560-->
+ <!-- Circle -->
+ <circle
+ cx="8550"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle26" />
+ <!-- Circle -->
+ <circle
+ cx="9000"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle28" />
+ <!-- Circle -->
+ <circle
+ cx="9450"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle30" />
+ <!-- Line -->
+ <polyline
+ points="6750,6300 8250,5010 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 6750 6300 - 8391 4890-->
+ <!-- Line -->
+ <polyline
+ points="11250,6300 9747,5010 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 11250 6300 - 9606 4890-->
+ <!-- Circle -->
+ <circle
+ cx="13950"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle40" />
+ <!-- Circle -->
+ <circle
+ cx="13500"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle42" />
+ <!-- Circle -->
+ <circle
+ cx="13050"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle44" />
+ <!-- Circle -->
+ <circle
+ cx="9450"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle46" />
+ <!-- Circle -->
+ <circle
+ cx="9000"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle48" />
+ <!-- Circle -->
+ <circle
+ cx="8550"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle50" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle52" />
+ <!-- Circle -->
+ <circle
+ cx="4500"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle54" />
+ <!-- Circle -->
+ <circle
+ cx="4050"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle56" />
+ <!-- Circle -->
+ <circle
+ cx="1800"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle58" />
+ <!-- Circle -->
+ <circle
+ cx="2250"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle60" />
+ <!-- Circle -->
+ <circle
+ cx="2700"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle62" />
+ <!-- Circle -->
+ <circle
+ cx="15300"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle64" />
+ <!-- Circle -->
+ <circle
+ cx="15750"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle66" />
+ <!-- Circle -->
+ <circle
+ cx="16200"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle68" />
+ <!-- Circle -->
+ <circle
+ cx="10800"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle70" />
+ <!-- Circle -->
+ <circle
+ cx="11250"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle72" />
+ <!-- Circle -->
+ <circle
+ cx="11700"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle74" />
+ <!-- Circle -->
+ <circle
+ cx="6300"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle76" />
+ <!-- Circle -->
+ <circle
+ cx="6750"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle78" />
+ <!-- Circle -->
+ <circle
+ cx="7200"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle80" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="11475"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect82" />
+ <!-- Line: box -->
+ <rect
+ x="1800"
+ y="9225"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect84" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="11475"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect86" />
+ <!-- Line: box -->
+ <rect
+ x="6300"
+ y="9270"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect88" />
+ <!-- Line: box -->
+ <rect
+ x="8955"
+ y="11475"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect90" />
+ <!-- Line: box -->
+ <rect
+ x="10755"
+ y="9270"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect92" />
+ <!-- Line: box -->
+ <rect
+ x="13455"
+ y="11475"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect94" />
+ <!-- Line: box -->
+ <rect
+ x="15255"
+ y="9270"
+ width="2700"
+ height="1800"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect96" />
+ <!-- Line -->
+ <polyline
+ points="11700,3600 10197,2310 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline98" />
+ <!-- Arrowhead on XXXpoint 11700 3600 - 10056 2190-->
+ <!-- Line -->
+ <polyline
+ points="6300,3600 7800,2310 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline102" />
+ <!-- Arrowhead on XXXpoint 6300 3600 - 7941 2190-->
+ <!-- Line -->
+ <polyline
+ points="3150,6300 4650,5010 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline106" />
+ <!-- Arrowhead on XXXpoint 3150 6300 - 4791 4890-->
+ <!-- Line -->
+ <polyline
+ points="14850,6300 13347,5010 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline110" />
+ <!-- Arrowhead on XXXpoint 14850 6300 - 13206 4890-->
+ <!-- Line -->
+ <polyline
+ points="1350,11475 1350,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline114" />
+ <!-- Arrowhead on XXXpoint 1350 11475 - 1350 7560-->
+ <!-- Line -->
+ <polyline
+ points="16650,9225 16650,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline118" />
+ <!-- Arrowhead on XXXpoint 16650 9225 - 16650 7560-->
+ <!-- Line -->
+ <polyline
+ points="14850,11475 14850,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline122" />
+ <!-- Arrowhead on XXXpoint 14850 11475 - 14850 7560-->
+ <!-- Line -->
+ <polyline
+ points="12150,9225 12150,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline126" />
+ <!-- Arrowhead on XXXpoint 12150 9225 - 12150 7560-->
+ <!-- Line -->
+ <polyline
+ points="10350,11475 10350,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline130" />
+ <!-- Arrowhead on XXXpoint 10350 11475 - 10350 7560-->
+ <!-- Line -->
+ <polyline
+ points="7650,9225 7650,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline134" />
+ <!-- Arrowhead on XXXpoint 7650 9225 - 7650 7560-->
+ <!-- Line -->
+ <polyline
+ points="5850,11475 5850,7746 "
+ style="stroke:#00d1d1;stroke-width:44.99790066;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline138" />
+ <!-- Arrowhead on XXXpoint 5850 11475 - 5850 7560-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12375"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text142">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12375"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5625"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text146">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5625"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text148">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6750"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text150">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6750"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text152">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11250"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text154">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11250"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text156">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15750"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text158">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15750"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text160">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text162">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text164">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="13050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text166">CPU 0</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="11925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text168">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="12375"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text170">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="10800"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text172">CPU 15</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="9675"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text174">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="10125"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text176">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5850"
+ y="11925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text178">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5850"
+ y="12375"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text180">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5850"
+ y="13050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text182">CPU 21823</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7650"
+ y="10845"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text184">CPU 21839</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7650"
+ y="10170"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text186">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7650"
+ y="9720"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text188">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="10305"
+ y="11925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text190">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="10305"
+ y="12375"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text192">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="10305"
+ y="13050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text194">CPU 43679</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="10845"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text196">CPU 43695</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="10170"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text198">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="9720"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text200">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14805"
+ y="11925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text202">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14805"
+ y="12375"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text204">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14805"
+ y="13050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text206">CPU 65519</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="16605"
+ y="10845"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text208">CPU 65535</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="16605"
+ y="10170"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text210">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="16605"
+ y="9720"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text212">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="675"
+ y="450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="start"
+ id="text214">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="9000"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text216">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="9000"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text218">rcu_node</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeLevel.svg b/Documentation/RCU/Design/Data-Structures/TreeLevel.svg
new file mode 100644
index 000000000000..7a7eb3bac95c
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeLevel.svg
@@ -0,0 +1,828 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:41:29 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="17.7in"
+ height="10.4in"
+ viewBox="-66 -66 21237 12507"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="TreeLevel.fig">
+ <metadata
+ id="metadata216">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs214">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3974"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1023"
+ inkscape:window-height="1148"
+ id="namedview212"
+ showgrid="false"
+ inkscape:zoom="0.55869424"
+ inkscape:cx="796.50006"
+ inkscape:cy="467.99997"
+ inkscape:window-x="897"
+ inkscape:window-y="24"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="20655"
+ height="8325"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="14130"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="7380"
+ y="3600"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="8505"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="13005"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="17505"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect16" />
+ <!-- Line: box -->
+ <rect
+ x="4005"
+ y="6300"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect18" />
+ <!-- Line: box -->
+ <rect
+ x="10755"
+ y="900"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect20" />
+ <!-- Line -->
+ <polyline
+ points="6255,9225 6255,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline22" />
+ <!-- Arrowhead on XXXpoint 6255 9225 - 6255 7560-->
+ <!-- Circle -->
+ <circle
+ cx="11655"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle26" />
+ <!-- Circle -->
+ <circle
+ cx="12105"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle28" />
+ <!-- Circle -->
+ <circle
+ cx="12555"
+ cy="4275"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle30" />
+ <!-- Line -->
+ <polyline
+ points="9855,6300 11355,5010 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 9855 6300 - 11496 4890-->
+ <!-- Line -->
+ <polyline
+ points="14355,6300 12852,5010 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 14355 6300 - 12711 4890-->
+ <!-- Circle -->
+ <circle
+ cx="17055"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle40" />
+ <!-- Circle -->
+ <circle
+ cx="16605"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle42" />
+ <!-- Circle -->
+ <circle
+ cx="16155"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle44" />
+ <!-- Circle -->
+ <circle
+ cx="12555"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle46" />
+ <!-- Circle -->
+ <circle
+ cx="12105"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle48" />
+ <!-- Circle -->
+ <circle
+ cx="11655"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle50" />
+ <!-- Circle -->
+ <circle
+ cx="8055"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle52" />
+ <!-- Circle -->
+ <circle
+ cx="7605"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle54" />
+ <!-- Circle -->
+ <circle
+ cx="7155"
+ cy="6975"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle56" />
+ <!-- Circle -->
+ <circle
+ cx="4905"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle58" />
+ <!-- Circle -->
+ <circle
+ cx="5355"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle60" />
+ <!-- Circle -->
+ <circle
+ cx="5805"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle62" />
+ <!-- Circle -->
+ <circle
+ cx="18405"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle64" />
+ <!-- Circle -->
+ <circle
+ cx="18855"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle66" />
+ <!-- Circle -->
+ <circle
+ cx="19305"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle68" />
+ <!-- Circle -->
+ <circle
+ cx="13905"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle70" />
+ <!-- Circle -->
+ <circle
+ cx="14355"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle72" />
+ <!-- Circle -->
+ <circle
+ cx="14805"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle74" />
+ <!-- Circle -->
+ <circle
+ cx="9405"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle76" />
+ <!-- Circle -->
+ <circle
+ cx="9855"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle78" />
+ <!-- Circle -->
+ <circle
+ cx="10305"
+ cy="8775"
+ r="114"
+ style="fill:#000000;stroke:#000000;stroke-width:21;"
+ id="circle80" />
+ <!-- Line: box -->
+ <rect
+ x="225"
+ y="1125"
+ width="3150"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect82" />
+ <!-- Line: box -->
+ <rect
+ x="225"
+ y="2250"
+ width="3150"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect84" />
+ <!-- Line: box -->
+ <rect
+ x="225"
+ y="3375"
+ width="3150"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:21; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect86" />
+ <!-- Line -->
+ <polyline
+ points="14805,3600 13302,2310 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline88" />
+ <!-- Arrowhead on XXXpoint 14805 3600 - 13161 2190-->
+ <!-- Line -->
+ <polyline
+ points="9405,3600 10905,2310 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline92" />
+ <!-- Arrowhead on XXXpoint 9405 3600 - 11046 2190-->
+ <!-- Line -->
+ <polyline
+ points="6255,6300 7755,5010 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline96" />
+ <!-- Arrowhead on XXXpoint 6255 6300 - 7896 4890-->
+ <!-- Line -->
+ <polyline
+ points="17955,6300 16452,5010 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline100" />
+ <!-- Arrowhead on XXXpoint 17955 6300 - 16311 4890-->
+ <!-- Line -->
+ <polyline
+ points="4455,11025 4455,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline104" />
+ <!-- Arrowhead on XXXpoint 4455 11025 - 4455 7560-->
+ <!-- Line -->
+ <polyline
+ points="19755,9225 19755,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline108" />
+ <!-- Arrowhead on XXXpoint 19755 9225 - 19755 7560-->
+ <!-- Line -->
+ <polyline
+ points="17955,11025 17955,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline112" />
+ <!-- Arrowhead on XXXpoint 17955 11025 - 17955 7560-->
+ <!-- Line -->
+ <polyline
+ points="15255,9225 15255,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline116" />
+ <!-- Arrowhead on XXXpoint 15255 9225 - 15255 7560-->
+ <!-- Line -->
+ <polyline
+ points="13455,11025 13455,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline120" />
+ <!-- Arrowhead on XXXpoint 13455 11025 - 13455 7560-->
+ <!-- Line -->
+ <polyline
+ points="10755,9225 10755,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline124" />
+ <!-- Arrowhead on XXXpoint 10755 9225 - 10755 7560-->
+ <!-- Line -->
+ <polyline
+ points="8955,11025 8955,7746 "
+ style="stroke:#00d1d1;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline128" />
+ <!-- Arrowhead on XXXpoint 8955 11025 - 8955 7560-->
+ <!-- Line: box -->
+ <rect
+ x="12105"
+ y="11025"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect132" />
+ <!-- Line: box -->
+ <rect
+ x="13905"
+ y="9225"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect134" />
+ <!-- Line: box -->
+ <rect
+ x="16605"
+ y="11025"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect136" />
+ <!-- Line: box -->
+ <rect
+ x="18405"
+ y="9225"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect138" />
+ <!-- Line: box -->
+ <rect
+ x="9405"
+ y="9225"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect140" />
+ <!-- Line: box -->
+ <rect
+ x="7605"
+ y="11025"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect142" />
+ <!-- Line: box -->
+ <rect
+ x="4905"
+ y="9225"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect144" />
+ <!-- Line: box -->
+ <rect
+ x="3105"
+ y="11025"
+ width="2700"
+ height="1350"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect146" />
+ <!-- Line -->
+ <polyline
+ points="3375,1575 10701,1575 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline148" />
+ <!-- Arrowhead on XXXpoint 3375 1575 - 10890 1575-->
+ <!-- Line -->
+ <polyline
+ points="3375,3825 4050,3825 4050,5400 2700,5400 2700,6975 3951,6975 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline152" />
+ <!-- Arrowhead on XXXpoint 2700 6975 - 4140 6975-->
+ <!-- Line -->
+ <polyline
+ points="3375,2700 5175,2700 5175,4275 7326,4275 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline156" />
+ <!-- Arrowhead on XXXpoint 5175 4275 - 7515 4275-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15480"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text160">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15480"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text162">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text164">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8730"
+ y="4500"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text166">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="9855"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text168">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="9855"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text170">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14355"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text172">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="14355"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text174">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="18855"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text176">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="18855"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text178">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5355"
+ y="6750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text180">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5355"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text182">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text184">-&gt;level[0]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="2925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text186">-&gt;level[1]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text188">-&gt;level[2]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text190">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="12105"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="middle"
+ id="text192">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6255"
+ y="10125"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text194">CPU 15</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4455"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text196">CPU 0</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="19755"
+ y="10125"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text198">CPU 65535</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="17955"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text200">CPU 65519</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="15255"
+ y="10125"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text202">CPU 43695</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="13455"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text204">CPU 43679</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="10755"
+ y="10125"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text206">CPU 21839</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="8955"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text208">CPU 21823</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="450"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="288"
+ text-anchor="start"
+ id="text210">struct rcu_state</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeMapping.svg b/Documentation/RCU/Design/Data-Structures/TreeMapping.svg
new file mode 100644
index 000000000000..729cfa9e6cdb
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeMapping.svg
@@ -0,0 +1,305 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:43:22 2015 -->
+
+<!-- Magnification: 1.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="3.1in"
+ height="0.9in"
+ viewBox="-12 -12 3699 1074"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="TreeMapping.fig">
+ <metadata
+ id="metadata66">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs64">
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Lend"
+ style="overflow:visible;">
+ <path
+ id="path3836"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(1.1) rotate(180) translate(1,0)" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow2Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Mend"
+ style="overflow:visible;">
+ <path
+ id="path3842"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(0.6) rotate(180) translate(0,0)" />
+ </marker>
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3824"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="991"
+ inkscape:window-height="606"
+ id="namedview62"
+ showgrid="false"
+ inkscape:zoom="3.0752688"
+ inkscape:cx="139.5"
+ inkscape:cy="40.5"
+ inkscape:window-x="891"
+ inkscape:window-y="177"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="3675"
+ height="1050"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="600"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="1125"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="1650"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="2175"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect16" />
+ <!-- Line: box -->
+ <rect
+ x="3225"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect18" />
+ <!-- Line -->
+ <polyline
+ points="675,375 675,150 300,150 300,358 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline20" />
+ <!-- Arrowhead on XXXpoint 300 150 - 300 390-->
+ <!-- Line -->
+ <polyline
+ points="1200,675 1200,900 300,900 300,691 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline24" />
+ <!-- Arrowhead on XXXpoint 300 900 - 300 660-->
+ <!-- Line -->
+ <polyline
+ points="1725,375 1725,150 900,150 900,358 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline28" />
+ <!-- Arrowhead on XXXpoint 900 150 - 900 390-->
+ <!-- Line -->
+ <polyline
+ points="2250,375 2250,75 825,75 825,358 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 825 75 - 825 390-->
+ <!-- Line -->
+ <polyline
+ points="2775,675 2775,900 1425,900 1425,691 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 1425 900 - 1425 660-->
+ <!-- Line -->
+ <polyline
+ points="3300,675 3300,975 1350,975 1350,691 "
+ style="stroke:#000000;stroke-width:7.00088889;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline40" />
+ <!-- Arrowhead on XXXpoint 1350 975 - 1350 660-->
+ <!-- Line: box -->
+ <rect
+ x="2700"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect44" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="300"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text46">0:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text48">4:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1875"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text50">0:1 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text52">2:3 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2925"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text54">4:5 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3450"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text56">6:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="825"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text58">0:3 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3600"
+ y="150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="end"
+ id="text60">struct rcu_state</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg b/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg
new file mode 100644
index 000000000000..5b416a4b8453
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/TreeMappingLevel.svg
@@ -0,0 +1,380 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:45:19 2015 -->
+
+<!-- Magnification: 1.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="3.1in"
+ height="1.8in"
+ viewBox="-12 -12 3699 2124"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="TreeMappingLevel.svg">
+ <metadata
+ id="metadata98">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title />
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs96">
+ <marker
+ inkscape:stockid="Arrow2Lend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow2Lend"
+ style="overflow:visible;">
+ <path
+ id="path3868"
+ style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
+ d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
+ transform="scale(1.1) rotate(180) translate(1,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1598"
+ inkscape:window-height="1211"
+ id="namedview94"
+ showgrid="false"
+ inkscape:zoom="5.2508961"
+ inkscape:cx="139.5"
+ inkscape:cy="81"
+ inkscape:window-x="840"
+ inkscape:window-y="122"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="3675"
+ height="2100"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="1350"
+ width="750"
+ height="225"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="1575"
+ width="750"
+ height="225"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="1800"
+ width="750"
+ height="225"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect12" />
+ <!-- Arc -->
+ <path
+ style="stroke:#000000;stroke-width:7;stroke-linecap:butt;"
+ d="M 1800,900 A 118 118 0 0 0 1800 1125 "
+ id="path14" />
+ <!-- Arc -->
+ <path
+ style="stroke:#000000;stroke-width:7;stroke-linecap:butt;"
+ d="M 750,900 A 75 75 0 0 0 750 1050 "
+ id="path16" />
+ <!-- Line -->
+ <polyline
+ points="750,900 750,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline18" />
+ <!-- Arrowhead on XXXpoint 750 900 - 750 660-->
+ <!-- Line: box -->
+ <rect
+ x="75"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect22" />
+ <!-- Line: box -->
+ <rect
+ x="600"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect24" />
+ <!-- Line: box -->
+ <rect
+ x="1650"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect26" />
+ <!-- Line: box -->
+ <rect
+ x="2175"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect28" />
+ <!-- Line: box -->
+ <rect
+ x="3225"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect30" />
+ <!-- Line -->
+ <polyline
+ points="675,375 675,150 300,150 300,358 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 300 150 - 300 390-->
+ <!-- Line -->
+ <polyline
+ points="1725,375 1725,150 900,150 900,358 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 900 150 - 900 390-->
+ <!-- Line -->
+ <polyline
+ points="2250,375 2250,75 825,75 825,358 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline40" />
+ <!-- Arrowhead on XXXpoint 825 75 - 825 390-->
+ <!-- Line -->
+ <polyline
+ points="2775,675 2775,975 1425,975 1425,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline44" />
+ <!-- Arrowhead on XXXpoint 1425 975 - 1425 660-->
+ <!-- Line: box -->
+ <rect
+ x="2700"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect48" />
+ <!-- Line: box -->
+ <rect
+ x="1125"
+ y="375"
+ width="375"
+ height="300"
+ rx="0"
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect50" />
+ <!-- Line -->
+ <polyline
+ points="3300,675 3300,1050 1350,1050 1350,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline52" />
+ <!-- Arrowhead on XXXpoint 1350 1050 - 1350 660-->
+ <!-- Line -->
+ <polyline
+ points="825,1425 975,1425 975,1200 225,1200 225,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline56" />
+ <!-- Arrowhead on XXXpoint 225 1200 - 225 660-->
+ <!-- Line -->
+ <polyline
+ points="1200,675 1200,975 300,975 300,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline60" />
+ <!-- Arrowhead on XXXpoint 300 975 - 300 660-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="150"
+ y="1500"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="108"
+ text-anchor="start"
+ id="text64">-&gt;level[0]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="150"
+ y="1725"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="108"
+ text-anchor="start"
+ id="text66">-&gt;level[1]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="150"
+ y="1950"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="108"
+ text-anchor="start"
+ id="text68">-&gt;level[2]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="300"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text70">0:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1350"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text72">4:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1875"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text74">0:1 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text76">2:3 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2925"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text78">4:5 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3450"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text80">6:7 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="825"
+ y="525"
+ fill="#000000"
+ font-family="Times"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="middle"
+ id="text82">0:3 </text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3600"
+ y="150"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="normal"
+ font-size="96"
+ text-anchor="end"
+ id="text84">struct rcu_state</text>
+ <!-- Line -->
+ <polyline
+ points="825,1875 1800,1875 1800,1125 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:none"
+ id="polyline86" />
+ <!-- Line -->
+ <polyline
+ points="1800,900 1800,691 "
+ style="stroke:#000000;stroke-width:7.00025806;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow2Lend)"
+ id="polyline88" />
+ <!-- Arrowhead on XXXpoint 1800 900 - 1800 660-->
+ <!-- Line -->
+ <polyline
+ points="825,1650 1200,1650 1200,1125 750,1125 750,1050 "
+ style="stroke:#000000;stroke-width:7; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline92" />
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/blkd_task.svg b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
new file mode 100644
index 000000000000..00e810bb8419
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/blkd_task.svg
@@ -0,0 +1,843 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:35:03 2015 -->
+
+<!-- Magnification: 2.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="10.1in"
+ height="8.6in"
+ viewBox="-44 -44 12088 10288"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="blkd_task.fig">
+ <metadata
+ id="metadata212">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs210">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3970"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="1087"
+ inkscape:window-height="1144"
+ id="namedview208"
+ showgrid="false"
+ inkscape:zoom="1.0495049"
+ inkscape:cx="454.50003"
+ inkscape:cy="387.00003"
+ inkscape:window-x="833"
+ inkscape:window-y="28"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="450"
+ y="0"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="4950"
+ y="4950"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="600"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect10" />
+ <!-- Line -->
+ <polyline
+ points="5250,8100 5688,5912 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline12" />
+ <!-- Arrowhead on XXXpoint 5250 8100 - 5710 5790-->
+ <polyline
+ points="5714 6068 5704 5822 5598 6044 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline14" />
+ <!-- Line -->
+ <polyline
+ points="4050,9300 4486,7262 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline16" />
+ <!-- Arrowhead on XXXpoint 4050 9300 - 4512 7140-->
+ <polyline
+ points="4514 7418 4506 7172 4396 7394 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline18" />
+ <!-- Line -->
+ <polyline
+ points="1040,9300 1476,7262 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline20" />
+ <!-- Arrowhead on XXXpoint 1040 9300 - 1502 7140-->
+ <polyline
+ points="1504 7418 1496 7172 1386 7394 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline22" />
+ <!-- Line -->
+ <polyline
+ points="2240,8100 2676,6062 "
+ style="stroke:#00ff00;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="polyline24" />
+ <!-- Arrowhead on XXXpoint 2240 8100 - 2702 5940-->
+ <polyline
+ points="2704 6218 2696 5972 2586 6194 "
+ style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8; "
+ id="polyline26" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="450"
+ width="6300"
+ height="7350"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffffff; "
+ id="rect28" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="1050"
+ width="5700"
+ height="3750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffff00; "
+ id="rect30" />
+ <!-- Line -->
+ <polyline
+ points="1350,3450 2350,2590 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline32" />
+ <!-- Arrowhead on XXXpoint 1350 3450 - 2444 2510-->
+ <!-- Line -->
+ <polyline
+ points="4950,3450 3948,2590 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline36" />
+ <!-- Arrowhead on XXXpoint 4950 3450 - 3854 2510-->
+ <!-- Line -->
+ <polyline
+ points="4050,6600 4050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline40" />
+ <!-- Arrowhead on XXXpoint 4050 6600 - 4050 4290-->
+ <!-- Line -->
+ <polyline
+ points="1050,6600 1050,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline44" />
+ <!-- Arrowhead on XXXpoint 1050 6600 - 1050 4290-->
+ <!-- Line -->
+ <polyline
+ points="2250,5400 2250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline48" />
+ <!-- Arrowhead on XXXpoint 2250 5400 - 2250 4290-->
+ <!-- Line -->
+ <polyline
+ points="2250,8100 2250,6364 "
+ style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline52" />
+ <!-- Arrowhead on XXXpoint 2250 8100 - 2250 6240-->
+ <!-- Line -->
+ <polyline
+ points="1050,9300 1050,7564 "
+ style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline56" />
+ <!-- Arrowhead on XXXpoint 1050 9300 - 1050 7440-->
+ <!-- Line -->
+ <polyline
+ points="4050,9300 4050,7564 "
+ style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline60" />
+ <!-- Arrowhead on XXXpoint 4050 9300 - 4050 7440-->
+ <!-- Line -->
+ <polyline
+ points="5250,8100 5250,6364 "
+ style="stroke:#00ff00;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline64" />
+ <!-- Arrowhead on XXXpoint 5250 8100 - 5250 6240-->
+ <!-- Circle -->
+ <circle
+ cx="2850"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle68" />
+ <!-- Circle -->
+ <circle
+ cx="3150"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle70" />
+ <!-- Circle -->
+ <circle
+ cx="3450"
+ cy="3900"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle72" />
+ <!-- Circle -->
+ <circle
+ cx="1350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle74" />
+ <!-- Circle -->
+ <circle
+ cx="1650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle76" />
+ <!-- Circle -->
+ <circle
+ cx="1950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle78" />
+ <!-- Circle -->
+ <circle
+ cx="4350"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle80" />
+ <!-- Circle -->
+ <circle
+ cx="4650"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle82" />
+ <!-- Circle -->
+ <circle
+ cx="4950"
+ cy="5100"
+ r="76"
+ style="fill:#000000;stroke:#000000;stroke-width:14;"
+ id="circle84" />
+ <!-- Line: box -->
+ <rect
+ x="750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect86" />
+ <!-- Line: box -->
+ <rect
+ x="300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect88" />
+ <!-- Line: box -->
+ <rect
+ x="4500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect90" />
+ <!-- Line: box -->
+ <rect
+ x="3300"
+ y="6600"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect92" />
+ <!-- Line: box -->
+ <rect
+ x="2250"
+ y="1650"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect94" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="9300"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect96" />
+ <!-- Line: box -->
+ <rect
+ x="1350"
+ y="8100"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect98" />
+ <!-- Line: box -->
+ <rect
+ x="3000"
+ y="9300"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect100" />
+ <!-- Line: box -->
+ <rect
+ x="4350"
+ y="8100"
+ width="2100"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#00ff00; "
+ id="rect102" />
+ <!-- Line: box -->
+ <rect
+ x="1500"
+ y="5400"
+ width="1500"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect104" />
+ <!-- Line -->
+ <polygon
+ points="5550,3450 7350,2850 7350,5100 5550,4350 5550,3450 "
+ style="stroke:#000000;stroke-width:14; stroke-linejoin:miter; stroke-linecap:butt; stroke-dasharray:120 120;fill:#ffbfbf; "
+ id="polygon106" />
+ <!-- Line -->
+ <polyline
+ points="9300,3150 10734,3150 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline108" />
+ <!-- Arrowhead on XXXpoint 9300 3150 - 10860 3150-->
+ <!-- Line: box -->
+ <rect
+ x="10800"
+ y="2850"
+ width="1200"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect112" />
+ <!-- Line -->
+ <polyline
+ points="11400,3600 11400,4284 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline114" />
+ <!-- Arrowhead on XXXpoint 11400 3600 - 11400 4410-->
+ <!-- Line: box -->
+ <rect
+ x="10800"
+ y="4350"
+ width="1200"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect118" />
+ <!-- Line -->
+ <polyline
+ points="11400,5100 11400,5784 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline120" />
+ <!-- Arrowhead on XXXpoint 11400 5100 - 11400 5910-->
+ <!-- Line: box -->
+ <rect
+ x="10800"
+ y="5850"
+ width="1200"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect124" />
+ <!-- Line -->
+ <polyline
+ points="9300,3900 9900,3900 9900,4650 10734,4650 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline126" />
+ <!-- Arrowhead on XXXpoint 9900 4650 - 10860 4650-->
+ <!-- Line -->
+ <polyline
+ points="9300,4650 9600,4650 9600,6150 10734,6150 "
+ style="stroke:#000000;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline130" />
+ <!-- Arrowhead on XXXpoint 9600 6150 - 10860 6150-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6450"
+ y="300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text134">rcu_bh</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="1950"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text136">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="3150"
+ y="2250"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text138">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text140">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text142">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text144">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text146">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text148">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text150">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="5700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text152">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5250"
+ y="6000"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text154">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="6900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text156">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="7200"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text158">rcu_data</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="450"
+ y="1350"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text160">struct rcu_state</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="9600"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text162">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="1050"
+ y="9900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text164">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="9600"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text166">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4050"
+ y="9900"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text168">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text170">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="2400"
+ y="8700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text172">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8400"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text174">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="5400"
+ y="8700"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text176">rcu_dynticks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="6000"
+ y="750"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="192"
+ text-anchor="end"
+ id="text178">rcu_sched</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11400"
+ y="3300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="216"
+ text-anchor="middle"
+ id="text180">T3</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11400"
+ y="4800"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="216"
+ text-anchor="middle"
+ id="text182">T2</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11400"
+ y="6300"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="216"
+ text-anchor="middle"
+ id="text184">T1</text>
+ <!-- Line -->
+ <polyline
+ points="5250,5400 5250,4414 "
+ style="stroke:#00d1d1;stroke-width:30.00057884;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline186" />
+ <!-- Arrowhead on XXXpoint 5250 5400 - 5250 4290-->
+ <!-- Line: box -->
+ <rect
+ x="3750"
+ y="3450"
+ width="1800"
+ height="900"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect190" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="2850"
+ width="1950"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect192" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="3600"
+ width="1950"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect194" />
+ <!-- Line: box -->
+ <rect
+ x="7350"
+ y="4350"
+ width="1950"
+ height="750"
+ rx="0"
+ style="stroke:#000000;stroke-width:30; stroke-linejoin:miter; stroke-linecap:butt; fill:#ffbfbf; "
+ id="rect196" />
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text198">rcu_node</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="4650"
+ y="3750"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="middle"
+ id="text200">struct</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7500"
+ y="3300"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text202">blkd_tasks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7500"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text204">gp_tasks</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="7500"
+ y="4800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="192"
+ text-anchor="start"
+ id="text206">exp_tasks</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Data-Structures/nxtlist.svg b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
new file mode 100644
index 000000000000..abc4cc73a097
--- /dev/null
+++ b/Documentation/RCU/Design/Data-Structures/nxtlist.svg
@@ -0,0 +1,396 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Creator: fig2dev Version 3.2 Patchlevel 5e -->
+
+<!-- CreationDate: Wed Dec 9 17:39:46 2015 -->
+
+<!-- Magnification: 3.000 -->
+
+<svg
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:cc="http://creativecommons.org/ns#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+ xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+ width="10.4in"
+ height="10.4in"
+ viewBox="-66 -66 12507 12507"
+ id="svg2"
+ version="1.1"
+ inkscape:version="0.48.4 r9939"
+ sodipodi:docname="nxtlist.fig">
+ <metadata
+ id="metadata94">
+ <rdf:RDF>
+ <cc:Work
+ rdf:about="">
+ <dc:format>image/svg+xml</dc:format>
+ <dc:type
+ rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+ <dc:title></dc:title>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs
+ id="defs92">
+ <marker
+ inkscape:stockid="Arrow1Mend"
+ orient="auto"
+ refY="0.0"
+ refX="0.0"
+ id="Arrow1Mend"
+ style="overflow:visible;">
+ <path
+ id="path3852"
+ d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+ style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;"
+ transform="scale(0.4) rotate(180) translate(10,0)" />
+ </marker>
+ </defs>
+ <sodipodi:namedview
+ pagecolor="#ffffff"
+ bordercolor="#666666"
+ borderopacity="1"
+ objecttolerance="10"
+ gridtolerance="10"
+ guidetolerance="10"
+ inkscape:pageopacity="0"
+ inkscape:pageshadow="2"
+ inkscape:window-width="925"
+ inkscape:window-height="928"
+ id="namedview90"
+ showgrid="false"
+ inkscape:zoom="0.80021373"
+ inkscape:cx="467.99997"
+ inkscape:cy="467.99997"
+ inkscape:window-x="948"
+ inkscape:window-y="73"
+ inkscape:window-maximized="0"
+ inkscape:current-layer="g4" />
+ <g
+ style="stroke-width:.025in; fill:none"
+ id="g4">
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="0"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect6" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="1125"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect8" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="2250"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect10" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="3375"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect12" />
+ <!-- Line: box -->
+ <rect
+ x="0"
+ y="4500"
+ width="7875"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; fill:#87cfff; "
+ id="rect14" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="0"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect16" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="1125"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect18" />
+ <!-- Line -->
+ <polyline
+ points="11475,2250 11475,3276 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline20" />
+ <!-- Arrowhead on XXXpoint 11475 2250 - 11475 3465-->
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="6750"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect24" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="7875"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect26" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="10125"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect28" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="11250"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect30" />
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="3375"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect32" />
+ <!-- Line -->
+ <polyline
+ points="11475,5625 11475,6651 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline34" />
+ <!-- Arrowhead on XXXpoint 11475 5625 - 11475 6840-->
+ <!-- Line -->
+ <polyline
+ points="7875,225 10476,225 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline38" />
+ <!-- Arrowhead on XXXpoint 7875 225 - 10665 225-->
+ <!-- Line -->
+ <polyline
+ points="7875,1350 9675,1350 9675,675 7971,675 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline42" />
+ <!-- Arrowhead on XXXpoint 9675 675 - 7785 675-->
+ <!-- Line -->
+ <polyline
+ points="7875,2475 9675,2475 9675,4725 10476,4725 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline46" />
+ <!-- Arrowhead on XXXpoint 9675 4725 - 10665 4725-->
+ <!-- Line -->
+ <polyline
+ points="7875,3600 9225,3600 9225,5175 10476,5175 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline50" />
+ <!-- Arrowhead on XXXpoint 9225 5175 - 10665 5175-->
+ <!-- Line -->
+ <polyline
+ points="7875,4725 8775,4725 8775,11475 10476,11475 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline54" />
+ <!-- Arrowhead on XXXpoint 8775 11475 - 10665 11475-->
+ <!-- Line: box -->
+ <rect
+ x="10575"
+ y="4500"
+ width="1800"
+ height="1125"
+ rx="0"
+ style="stroke:#000000;stroke-width:45; stroke-linejoin:miter; stroke-linecap:butt; "
+ id="rect58" />
+ <!-- Line -->
+ <polyline
+ points="11475,9000 11475,10026 "
+ style="stroke:#000000;stroke-width:45.00382345;stroke-linejoin:miter;stroke-linecap:butt;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)"
+ id="polyline60" />
+ <!-- Arrowhead on XXXpoint 11475 9000 - 11475 10215-->
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="675"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text64">nxtlist</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="1800"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text66">nxttail[RCU_DONE_TAIL]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="2925"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text68">nxttail[RCU_WAIT_TAIL]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="4050"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text70">nxttail[RCU_NEXT_READY_TAIL]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="225"
+ y="5175"
+ fill="#000000"
+ font-family="Courier"
+ font-style="normal"
+ font-weight="bold"
+ font-size="324"
+ text-anchor="start"
+ id="text72">nxttail[RCU_NEXT_TAIL]</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="675"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text74">CB 1</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="1800"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text76">next</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="7425"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text78">CB 3</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="8550"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text80">next</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="10800"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text82">CB 4</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="11925"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text84">next</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="4050"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text86">CB 2</text>
+ <!-- Text -->
+ <text
+ xml:space="preserve"
+ x="11475"
+ y="5175"
+ fill="#000000"
+ font-family="Helvetica"
+ font-style="normal"
+ font-weight="normal"
+ font-size="324"
+ text-anchor="middle"
+ id="text88">next</text>
+ </g>
+</svg>
diff --git a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png b/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
deleted file mode 100644
index 7496a55e4e7b..000000000000
--- a/Documentation/RCU/Design/Requirements/2013-08-is-it-dead.png
+++ /dev/null
Binary files differ
diff --git a/Documentation/RCU/Design/Requirements/RCUApplicability.svg b/Documentation/RCU/Design/Requirements/RCUApplicability.svg
deleted file mode 100644
index ebcbeee391ed..000000000000
--- a/Documentation/RCU/Design/Requirements/RCUApplicability.svg
+++ /dev/null
@@ -1,237 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!-- Creator: fig2dev Version 3.2 Patchlevel 5d -->
-
-<!-- CreationDate: Tue Mar 4 18:34:25 2014 -->
-
-<!-- Magnification: 3.000 -->
-
-<svg
- xmlns:dc="http://purl.org/dc/elements/1.1/"
- xmlns:cc="http://creativecommons.org/ns#"
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:svg="http://www.w3.org/2000/svg"
- xmlns="http://www.w3.org/2000/svg"
- xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
- xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
- width="1089.1382"
- height="668.21368"
- viewBox="-2121 -36 14554.634 8876.4061"
- id="svg2"
- version="1.1"
- inkscape:version="0.48.3.1 r9886"
- sodipodi:docname="RCUApplicability.svg">
- <metadata
- id="metadata40">
- <rdf:RDF>
- <cc:Work
- rdf:about="">
- <dc:format>image/svg+xml</dc:format>
- <dc:type
- rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
- <dc:title />
- </cc:Work>
- </rdf:RDF>
- </metadata>
- <defs
- id="defs38" />
- <sodipodi:namedview
- pagecolor="#ffffff"
- bordercolor="#666666"
- borderopacity="1"
- objecttolerance="10"
- gridtolerance="10"
- guidetolerance="10"
- inkscape:pageopacity="0"
- inkscape:pageshadow="2"
- inkscape:window-width="849"
- inkscape:window-height="639"
- id="namedview36"
- showgrid="false"
- inkscape:zoom="0.51326165"
- inkscape:cx="544.56912"
- inkscape:cy="334.10686"
- inkscape:window-x="149"
- inkscape:window-y="448"
- inkscape:window-maximized="0"
- inkscape:current-layer="g4"
- fit-margin-top="5"
- fit-margin-left="5"
- fit-margin-right="5"
- fit-margin-bottom="5" />
- <g
- style="fill:none;stroke-width:0.025in"
- id="g4"
- transform="translate(-2043.6828,14.791398)">
- <!-- Line: box -->
- <rect
- x="0"
- y="0"
- width="14400"
- height="8775"
- rx="0"
- style="fill:#ffa1a1;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
- id="rect6" />
- <!-- Line: box -->
- <rect
- x="1350"
- y="0"
- width="11700"
- height="6075"
- rx="0"
- style="fill:#ffff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
- id="rect8" />
- <!-- Line: box -->
- <rect
- x="2700"
- y="0"
- width="9000"
- height="4275"
- rx="0"
- style="fill:#00ff00;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
- id="rect10" />
- <!-- Line: box -->
- <rect
- x="4050"
- y="0"
- width="6300"
- height="2475"
- rx="0"
- style="fill:#87cfff;stroke:#000000;stroke-width:21;stroke-linecap:butt;stroke-linejoin:miter"
- id="rect12" />
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="900"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text14"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3017">Read-Mostly, Stale &amp;</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="1350"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text16"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3019">Inconsistent Data OK</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="1800"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text18"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3021">(RCU Works Great!!!)</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="3825"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text20"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3023">(RCU Works Well)</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="3375"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text22"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3025">Read-Mostly, Need Consistent Data</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="5175"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text24"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3027">Read-Write, Need Consistent Data</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="6975"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text26"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- sodipodi:linespacing="125%">Update-Mostly, Need Consistent Data</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="5625"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text28"
- sodipodi:linespacing="125%"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"><tspan
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- id="tspan3029">(RCU Might Be OK...)</tspan></text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="7875"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text30"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- sodipodi:linespacing="125%">(1) Provide Existence Guarantees For Update-Friendly Mechanisms</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="8325"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text32"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- sodipodi:linespacing="125%">(2) Provide Wait-Free Read-Side Primitives for Real-Time Use)</text>
- <!-- Text -->
- <text
- xml:space="preserve"
- x="7200"
- y="7425"
- font-style="normal"
- font-weight="normal"
- font-size="324"
- id="text34"
- style="font-size:427.63009644px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:center;line-height:125%;writing-mode:lr-tb;text-anchor:middle;fill:#000000;font-family:Nimbus Sans L;-inkscape-font-specification:Nimbus Sans L"
- sodipodi:linespacing="125%">(RCU is Very Unlikely to be the Right Tool For The Job, But it Can:</text>
- </g>
-</svg>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index a725f9900ec8..e7e24b3e86e2 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -1,5 +1,3 @@
-<!-- DO NOT HAND EDIT. -->
-<!-- Instead, edit Documentation/RCU/Design/Requirements/Requirements.htmlx and run 'sh htmlqqz.sh Documentation/RCU/Design/Requirements/Requirements' -->
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
@@ -65,8 +63,8 @@ All that aside, here are the categories of currently known RCU requirements:
<p>
This is followed by a <a href="#Summary">summary</a>,
-which is in turn followed by the inevitable
-<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
+however, the answers to each quick quiz immediately follows the quiz.
+Select the big white space with your mouse to see the answer.
<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
@@ -153,13 +151,27 @@ Therefore, the outcome:
</blockquote>
cannot happen.
-<p><a name="Quick Quiz 1"><b>Quick Quiz 1</b>:</a>
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-<br><a href="#qq1answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Wait a minute!
+ You said that updaters can make useful forward progress concurrently
+ with readers, but pre-existing readers will block
+ <tt>synchronize_rcu()</tt>!!!
+ Just who are you trying to fool???
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ First, if updaters do not wish to be blocked by readers, they can use
+ <tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
+ be discussed later.
+ Second, even when using <tt>synchronize_rcu()</tt>, the other
+ update-side code does run concurrently with readers, whether
+ pre-existing or not.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
This scenario resembles one of the first uses of RCU in
@@ -210,9 +222,20 @@ to guarantee that <tt>do_something()</tt> never runs concurrently
with <tt>recovery()</tt>, but with little or no synchronization
overhead in <tt>do_something_dlm()</tt>.
-<p><a name="Quick Quiz 2"><b>Quick Quiz 2</b>:</a>
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-<br><a href="#qq2answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Without that extra grace period, memory reordering could result in
+ <tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
+ concurrently with the last bits of <tt>recovery()</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
In order to avoid fatal problems such as deadlocks,
@@ -332,12 +355,27 @@ It also prevents any number of &ldquo;interesting&rdquo; compiler
optimizations, for example, the use of <tt>gp</tt> as a scratch
location immediately preceding the assignment.
-<p><a name="Quick Quiz 3"><b>Quick Quiz 3</b>:</a>
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-<br><a href="#qq3answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
+ two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
+ from being reordered.
+ Can't that also cause problems?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ No, it cannot.
+ The readers cannot see either of these two fields until
+ the assignment to <tt>gp</tt>, by which time both fields are
+ fully initialized.
+ So reordering the assignments
+ to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
+ cause any problems.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
It is tempting to assume that the reader need not do anything special
@@ -494,11 +532,42 @@ The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
code protected by the corresponding update-side lock.
</ol>
-<p><a name="Quick Quiz 4"><b>Quick Quiz 4</b>:</a>
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-<br><a href="#qq4answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Without the <tt>rcu_dereference()</tt> or the
+ <tt>rcu_access_pointer()</tt>, what destructive optimizations
+ might the compiler make use of?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Let's start with what happens to <tt>do_something_gp()</tt>
+ if it fails to use <tt>rcu_dereference()</tt>.
+ It could reuse a value formerly fetched from this same pointer.
+ It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
+ manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
+ mash-up of two distince pointer values.
+ It might even use value-speculation optimizations, where it makes
+ a wrong guess, but by the time it gets around to checking the
+ value, an update has changed the pointer to match the wrong guess.
+ Too bad about any dereferences that returned pre-initialization garbage
+ in the meantime!
+ </font>
+
+ <p><font color="ffffff">
+ For <tt>remove_gp_synchronous()</tt>, as long as all modifications
+ to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
+ the above optimizations are harmless.
+ However,
+ with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
+ <tt>sparse</tt> will complain if you
+ define <tt>gp</tt> with <tt>__rcu</tt> and then
+ access it without using
+ either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
In short, RCU's publish-subscribe guarantee is provided by the combination
@@ -571,17 +640,156 @@ systems with more than one CPU:
<tt>synchronize_rcu()</tt> migrates in the meantime.
</ol>
-<p><a name="Quick Quiz 5"><b>Quick Quiz 5</b>:</a>
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-<br><a href="#qq5answer">Answer</a>
-
-<p><a name="Quick Quiz 6"><b>Quick Quiz 6</b>:</a>
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-<br><a href="#qq6answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Given that multiple CPUs can start RCU read-side critical sections
+ at any time without any ordering whatsoever, how can RCU possibly
+ tell whether or not a given RCU read-side critical section starts
+ before a given instance of <tt>synchronize_rcu()</tt>?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ If RCU cannot tell whether or not a given
+ RCU read-side critical section starts before a
+ given instance of <tt>synchronize_rcu()</tt>,
+ then it must assume that the RCU read-side critical section
+ started first.
+ In other words, a given instance of <tt>synchronize_rcu()</tt>
+ can avoid waiting on a given RCU read-side critical section only
+ if it can prove that <tt>synchronize_rcu()</tt> started first.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ The first and second guarantees require unbelievably strict ordering!
+ Are all these memory barriers <i> really</i> required?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Yes, they really are required.
+ To see why the first guarantee is required, consider the following
+ sequence of events:
+ </font>
+
+ <ol>
+ <li> <font color="ffffff">
+ CPU 1: <tt>rcu_read_lock()</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 1: <tt>q = rcu_dereference(gp);
+ /* Very likely to return p. */</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 0: <tt>list_del_rcu(p);</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 0: <tt>synchronize_rcu()</tt> starts.
+ </font>
+ <li> <font color="ffffff">
+ CPU 1: <tt>do_something_with(q-&gt;a);
+ /* No smp_mb(), so might happen after kfree(). */</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 1: <tt>rcu_read_unlock()</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 0: <tt>synchronize_rcu()</tt> returns.
+ </font>
+ <li> <font color="ffffff">
+ CPU 0: <tt>kfree(p);</tt>
+ </font>
+ </ol>
+
+ <p><font color="ffffff">
+ Therefore, there absolutely must be a full memory barrier between the
+ end of the RCU read-side critical section and the end of the
+ grace period.
+ </font>
+
+ <p><font color="ffffff">
+ The sequence of events demonstrating the necessity of the second rule
+ is roughly similar:
+ </font>
+
+ <ol>
+ <li> <font color="ffffff">CPU 0: <tt>list_del_rcu(p);</tt>
+ </font>
+ <li> <font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> starts.
+ </font>
+ <li> <font color="ffffff">CPU 1: <tt>rcu_read_lock()</tt>
+ </font>
+ <li> <font color="ffffff">CPU 1: <tt>q = rcu_dereference(gp);
+ /* Might return p if no memory barrier. */</tt>
+ </font>
+ <li> <font color="ffffff">CPU 0: <tt>synchronize_rcu()</tt> returns.
+ </font>
+ <li> <font color="ffffff">CPU 0: <tt>kfree(p);</tt>
+ </font>
+ <li> <font color="ffffff">
+ CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
+ </font>
+ <li> <font color="ffffff">CPU 1: <tt>rcu_read_unlock()</tt>
+ </font>
+ </ol>
+
+ <p><font color="ffffff">
+ And similarly, without a memory barrier between the beginning of the
+ grace period and the beginning of the RCU read-side critical section,
+ CPU&nbsp;1 might end up accessing the freelist.
+ </font>
+
+ <p><font color="ffffff">
+ The &ldquo;as if&rdquo; rule of course applies, so that any
+ implementation that acts as if the appropriate memory barriers
+ were in place is a correct implementation.
+ That said, it is much easier to fool yourself into believing
+ that you have adhered to the as-if rule than it is to actually
+ adhere to it!
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ You claim that <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+ generate absolutely no code in some kernel builds.
+ This means that the compiler might arbitrarily rearrange consecutive
+ RCU read-side critical sections.
+ Given such rearrangement, if a given RCU read-side critical section
+ is done, how can you be sure that all prior RCU read-side critical
+ sections are done?
+ Won't the compiler rearrangements make that impossible to determine?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In cases where <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+ generate absolutely no code, RCU infers quiescent states only at
+ special locations, for example, within the scheduler.
+ Because calls to <tt>schedule()</tt> had better prevent calling-code
+ accesses to shared variables from being rearranged across the call to
+ <tt>schedule()</tt>, if RCU detects the end of a given RCU read-side
+ critical section, it will necessarily detect the end of all prior
+ RCU read-side critical sections, no matter how aggressively the
+ compiler scrambles the code.
+ </font>
+
+ <p><font color="ffffff">
+ Again, this all assumes that the compiler cannot scramble code across
+ calls to the scheduler, out of interrupt handlers, into the idle loop,
+ into user-mode code, and so on.
+ But if your kernel build allows that sort of scrambling, you have broken
+ far more than just RCU!
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
Note that these memory-barrier requirements do not replace the fundamental
@@ -626,9 +834,19 @@ inconvenience can be avoided through use of the
<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
described later in this document.
-<p><a name="Quick Quiz 7"><b>Quick Quiz 7</b>:</a>
-But how does the upgrade-to-write operation exclude other readers?
-<br><a href="#qq7answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ But how does the upgrade-to-write operation exclude other readers?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ It doesn't, just like normal RCU updates, which also do not exclude
+ RCU readers.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
This guarantee allows lookup code to be shared between read-side
@@ -714,9 +932,20 @@ to do significant reordering.
This is by design: Any significant ordering constraints would slow down
these fast-path APIs.
-<p><a name="Quick Quiz 8"><b>Quick Quiz 8</b>:</a>
-Can't the compiler also reorder this code?
-<br><a href="#qq8answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Can't the compiler also reorder this code?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ No, the volatile casts in <tt>READ_ONCE()</tt> and
+ <tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
+ this particular case.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
@@ -769,10 +998,28 @@ new readers can start immediately after <tt>synchronize_rcu()</tt>
starts, and <tt>synchronize_rcu()</tt> is under no
obligation to wait for these new readers.
-<p><a name="Quick Quiz 9"><b>Quick Quiz 9</b>:</a>
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-<br><a href="#qq9answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Suppose that synchronize_rcu() did wait until <i>all</i>
+ readers had completed instead of waiting only on
+ pre-existing readers.
+ For how long would the updater be able to rely on there
+ being no readers?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ For no time at all.
+ Even if <tt>synchronize_rcu()</tt> were to wait until
+ all readers had completed, a new reader might start immediately after
+ <tt>synchronize_rcu()</tt> completed.
+ Therefore, the code following
+ <tt>synchronize_rcu()</tt> can <i>never</i> rely on there being
+ no readers.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
@@ -969,11 +1216,24 @@ grace period.
As a result, an RCU read-side critical section cannot partition a pair
of RCU grace periods.
-<p><a name="Quick Quiz 10"><b>Quick Quiz 10</b>:</a>
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-<br><a href="#qq10answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ How long a sequence of grace periods, each separated by an RCU
+ read-side critical section, would be required to partition the RCU
+ read-side critical sections at the beginning and end of the chain?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In theory, an infinite number.
+ In practice, an unknown number that is sensitive to both implementation
+ details and timing considerations.
+ Therefore, even in practice, RCU users must abide by the
+ theoretical rather than the practical answer.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<h3><a name="Disabling Preemption Does Not Block Grace Periods">
Disabling Preemption Does Not Block Grace Periods</a></h3>
@@ -1109,12 +1369,27 @@ These classes is covered in the following sections.
<h3><a name="Specialization">Specialization</a></h3>
<p>
-RCU is and always has been intended primarily for read-mostly situations, as
-illustrated by the following figure.
-This means that RCU's read-side primitives are optimized, often at the
+RCU is and always has been intended primarily for read-mostly situations,
+which means that RCU's read-side primitives are optimized, often at the
expense of its update-side primitives.
+Experience thus far is captured by the following list of situations:
-<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
+<ol>
+<li> Read-mostly data, where stale and inconsistent data is not
+ a problem: RCU works great!
+<li> Read-mostly data, where data must be consistent:
+ RCU works well.
+<li> Read-write data, where data must be consistent:
+ RCU <i>might</i> work OK.
+ Or not.
+<li> Write-mostly data, where data must be consistent:
+ RCU is very unlikely to be the right tool for the job,
+ with the following exceptions, where RCU can provide:
+ <ol type=a>
+ <li> Existence guarantees for update-friendly mechanisms.
+ <li> Wait-free read-side primitives for real-time use.
+ </ol>
+</ol>
<p>
This focus on read-mostly situations means that RCU must interoperate
@@ -1127,9 +1402,43 @@ synchronization primitives be legal within RCU read-side critical sections,
including spinlocks, sequence locks, atomic operations, reference
counters, and memory barriers.
-<p><a name="Quick Quiz 11"><b>Quick Quiz 11</b>:</a>
-What about sleeping locks?
-<br><a href="#qq11answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ What about sleeping locks?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ These are forbidden within Linux-kernel RCU read-side critical
+ sections because it is not legal to place a quiescent state
+ (in this case, voluntary context switch) within an RCU read-side
+ critical section.
+ However, sleeping locks may be used within userspace RCU read-side
+ critical sections, and also within Linux-kernel sleepable RCU
+ <a href="#Sleepable RCU"><font color="ffffff">(SRCU)</font></a>
+ read-side critical sections.
+ In addition, the -rt patchset turns spinlocks into a
+ sleeping locks so that the corresponding critical sections
+ can be preempted, which also means that these sleeplockified
+ spinlocks (but not other sleeping locks!) may be acquire within
+ -rt-Linux-kernel RCU read-side critical sections.
+ </font>
+
+ <p><font color="ffffff">
+ Note that it <i>is</i> legal for a normal RCU read-side
+ critical section to conditionally acquire a sleeping locks
+ (as in <tt>mutex_trylock()</tt>), but only as long as it does
+ not loop indefinitely attempting to conditionally acquire that
+ sleeping locks.
+ The key point is that things like <tt>mutex_trylock()</tt>
+ either return with the mutex held, or return an error indication if
+ the mutex was not immediately available.
+ Either way, <tt>mutex_trylock()</tt> returns immediately without
+ sleeping.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
It often comes as a surprise that many algorithms do not require a
@@ -1160,10 +1469,7 @@ some period of time, so the exact wait period is a judgment call.
One of our pair of veternarians might wait 30 seconds before pronouncing
the cat dead, while the other might insist on waiting a full minute.
The two veternarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat, as
-fancifully illustrated below:
-
-<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
+the final 30 seconds of the minute following the last heartbeat.
<p>
Interestingly enough, this same situation applies to hardware.
@@ -1343,7 +1649,8 @@ situations where neither <tt>synchronize_rcu()</tt> nor
<tt>synchronize_rcu_expedited()</tt> would be legal,
including within preempt-disable code, <tt>local_bh_disable()</tt> code,
interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
+However, even <tt>call_rcu()</tt> is illegal within NMI handlers
+and from idle and offline CPUs.
The callback function (<tt>remove_gp_cb()</tt> in this case) will be
executed within softirq (software interrupt) environment within the
Linux kernel,
@@ -1354,12 +1661,27 @@ write an RCU callback function that takes too long.
Long-running operations should be relegated to separate threads or
(in the Linux kernel) workqueues.
-<p><a name="Quick Quiz 12"><b>Quick Quiz 12</b>:</a>
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-<br><a href="#qq12answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
+ After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
+ structure, which would interact badly with concurrent insertions.
+ Doesn't this mean that <tt>rcu_dereference()</tt> is required?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
+ any changes, including any insertions that <tt>rcu_dereference()</tt>
+ would protect against.
+ Therefore, any insertions will be delayed until after
+ <tt>-&gt;gp_lock</tt>
+ is released on line&nbsp;25, which in turn means that
+ <tt>rcu_access_pointer()</tt> suffices.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
However, all that <tt>remove_gp_cb()</tt> is doing is
@@ -1406,14 +1728,31 @@ This was due to the fact that RCU was not heavily used within DYNIX/ptx,
so the very few places that needed something like
<tt>synchronize_rcu()</tt> simply open-coded it.
-<p><a name="Quick Quiz 13"><b>Quick Quiz 13</b>:</a>
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-<br><a href="#qq13answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ Earlier it was claimed that <tt>call_rcu()</tt> and
+ <tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
+ by readers.
+ But how can that be correct, given that the invocation of the callback
+ and the freeing of the memory (respectively) must still wait for
+ a grace period to elapse?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ We could define things this way, but keep in mind that this sort of
+ definition would say that updates in garbage-collected languages
+ cannot complete until the next time the garbage collector runs,
+ which does not seem at all reasonable.
+ The key point is that in most cases, an updater using either
+ <tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
+ next update as soon as it has invoked <tt>call_rcu()</tt> or
+ <tt>kfree_rcu()</tt>, without having to wait for a subsequent
+ grace period.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
But what if the updater must wait for the completion of code to be
@@ -1838,11 +2177,26 @@ kthreads to be spawned.
Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
initialization can result in deadlock.
-<p><a name="Quick Quiz 14"><b>Quick Quiz 14</b>:</a>
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-<br><a href="#qq14answer">Answer</a>
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ So what happens with <tt>synchronize_rcu()</tt> during
+ scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
+ kernels?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
+ maps directly to <tt>synchronize_sched()</tt>.
+ Therefore, <tt>synchronize_rcu()</tt> works normally throughout
+ boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
+ However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
+ so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
+ during scheduler initialization.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
<p>
I learned of these boot-time requirements as a result of a series of
@@ -2171,6 +2525,14 @@ This real-time requirement motivated the grace-period kthread, which
also simplified handling of a number of race conditions.
<p>
+RCU must avoid degrading real-time response for CPU-bound threads, whether
+executing in usermode (which is one use case for
+<tt>CONFIG_NO_HZ_FULL=y</tt>) or in the kernel.
+That said, CPU-bound loops in the kernel must execute
+<tt>cond_resched_rcu_qs()</tt> at least once per few tens of milliseconds
+in order to avoid receiving an IPI from RCU.
+
+<p>
Finally, RCU's status as a synchronization primitive means that
any RCU failure can result in arbitrary memory corruption that can be
extremely difficult to debug.
@@ -2223,6 +2585,8 @@ described in a separate section.
<li> <a href="#Sched Flavor">Sched Flavor</a>
<li> <a href="#Sleepable RCU">Sleepable RCU</a>
<li> <a href="#Tasks RCU">Tasks RCU</a>
+<li> <a href="#Waiting for Multiple Grace Periods">
+ Waiting for Multiple Grace Periods</a>
</ol>
<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
@@ -2472,6 +2836,94 @@ The tasks-RCU API is quite compact, consisting only of
<tt>synchronize_rcu_tasks()</tt>, and
<tt>rcu_barrier_tasks()</tt>.
+<h3><a name="Waiting for Multiple Grace Periods">
+Waiting for Multiple Grace Periods</a></h3>
+
+<p>
+Perhaps you have an RCU protected data structure that is accessed from
+RCU read-side critical sections, from softirq handlers, and from
+hardware interrupt handlers.
+That is three flavors of RCU, the normal flavor, the bottom-half flavor,
+and the sched flavor.
+How to wait for a compound grace period?
+
+<p>
+The best approach is usually to &ldquo;just say no!&rdquo; and
+insert <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>
+around each RCU read-side critical section, regardless of what
+environment it happens to be in.
+But suppose that some of the RCU read-side critical sections are
+on extremely hot code paths, and that use of <tt>CONFIG_PREEMPT=n</tt>
+is not a viable option, so that <tt>rcu_read_lock()</tt> and
+<tt>rcu_read_unlock()</tt> are not free.
+What then?
+
+<p>
+You <i>could</i> wait on all three grace periods in succession, as follows:
+
+<blockquote>
+<pre>
+ 1 synchronize_rcu();
+ 2 synchronize_rcu_bh();
+ 3 synchronize_sched();
+</pre>
+</blockquote>
+
+<p>
+This works, but triples the update-side latency penalty.
+In cases where this is not acceptable, <tt>synchronize_rcu_mult()</tt>
+may be used to wait on all three flavors of grace period concurrently:
+
+<blockquote>
+<pre>
+ 1 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched);
+</pre>
+</blockquote>
+
+<p>
+But what if it is necessary to also wait on SRCU?
+This can be done as follows:
+
+<blockquote>
+<pre>
+ 1 static void call_my_srcu(struct rcu_head *head,
+ 2 void (*func)(struct rcu_head *head))
+ 3 {
+ 4 call_srcu(&amp;my_srcu, head, func);
+ 5 }
+ 6
+ 7 synchronize_rcu_mult(call_rcu, call_rcu_bh, call_rcu_sched, call_my_srcu);
+</pre>
+</blockquote>
+
+<p>
+If you needed to wait on multiple different flavors of SRCU
+(but why???), you would need to create a wrapper function resembling
+<tt>call_my_srcu()</tt> for each SRCU flavor.
+
+<table>
+<tr><th>&nbsp;</th></tr>
+<tr><th align="left">Quick Quiz:</th></tr>
+<tr><td>
+ But what if I need to wait for multiple RCU flavors, but I also need
+ the grace periods to be expedited?
+</td></tr>
+<tr><th align="left">Answer:</th></tr>
+<tr><td bgcolor="#ffffff"><font color="ffffff">
+ If you are using expedited grace periods, there should be less penalty
+ for waiting on them in succession.
+ But if that is nevertheless a problem, you can use workqueues
+ or multiple kthreads to wait on the various expedited grace
+ periods concurrently.
+</font></td></tr>
+<tr><td>&nbsp;</td></tr>
+</table>
+
+<p>
+Again, it is usually better to adjust the RCU read-side critical sections
+to use a single flavor of RCU, but when this is not feasible, you can use
+<tt>synchronize_rcu_mult()</tt>.
+
<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
<p>
@@ -2569,329 +3021,4 @@ and is provided
under the terms of the Creative Commons Attribution-Share Alike 3.0
United States license.
-<h3><a name="Answers to Quick Quizzes">
-Answers to Quick Quizzes</a></h3>
-
-<a name="qq1answer"></a>
-<p><b>Quick Quiz 1</b>:
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-
-
-</p><p><b>Answer</b>:
-First, if updaters do not wish to be blocked by readers, they can use
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-be discussed later.
-Second, even when using <tt>synchronize_rcu()</tt>, the other
-update-side code does run concurrently with readers, whether pre-existing
-or not.
-
-
-</p><p><a href="#Quick%20Quiz%201"><b>Back to Quick Quiz 1</b>.</a>
-
-<a name="qq2answer"></a>
-<p><b>Quick Quiz 2</b>:
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-
-
-</p><p><b>Answer</b>:
-Without that extra grace period, memory reordering could result in
-<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-concurrently with the last bits of <tt>recovery()</tt>.
-
-
-</p><p><a href="#Quick%20Quiz%202"><b>Back to Quick Quiz 2</b>.</a>
-
-<a name="qq3answer"></a>
-<p><b>Quick Quiz 3</b>:
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-
-
-</p><p><b>Answer</b>:
-No, it cannot.
-The readers cannot see either of these two fields until
-the assignment to <tt>gp</tt>, by which time both fields are
-fully initialized.
-So reordering the assignments
-to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
-cause any problems.
-
-
-</p><p><a href="#Quick%20Quiz%203"><b>Back to Quick Quiz 3</b>.</a>
-
-<a name="qq4answer"></a>
-<p><b>Quick Quiz 4</b>:
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-
-
-</p><p><b>Answer</b>:
-Let's start with what happens to <tt>do_something_gp()</tt>
-if it fails to use <tt>rcu_dereference()</tt>.
-It could reuse a value formerly fetched from this same pointer.
-It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-mash-up of two distince pointer values.
-It might even use value-speculation optimizations, where it makes a wrong
-guess, but by the time it gets around to checking the value, an update
-has changed the pointer to match the wrong guess.
-Too bad about any dereferences that returned pre-initialization garbage
-in the meantime!
-
-<p>
-For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-the above optimizations are harmless.
-However,
-with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-<tt>sparse</tt> will complain if you
-define <tt>gp</tt> with <tt>__rcu</tt> and then
-access it without using
-either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-
-
-</p><p><a href="#Quick%20Quiz%204"><b>Back to Quick Quiz 4</b>.</a>
-
-<a name="qq5answer"></a>
-<p><b>Quick Quiz 5</b>:
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-
-
-</p><p><b>Answer</b>:
-If RCU cannot tell whether or not a given
-RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>,
-then it must assume that the RCU read-side critical section
-started first.
-In other words, a given instance of <tt>synchronize_rcu()</tt>
-can avoid waiting on a given RCU read-side critical section only
-if it can prove that <tt>synchronize_rcu()</tt> started first.
-
-
-</p><p><a href="#Quick%20Quiz%205"><b>Back to Quick Quiz 5</b>.</a>
-
-<a name="qq6answer"></a>
-<p><b>Quick Quiz 6</b>:
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-
-
-</p><p><b>Answer</b>:
-Yes, they really are required.
-To see why the first guarantee is required, consider the following
-sequence of events:
-
-<ol>
-<li> CPU 1: <tt>rcu_read_lock()</tt>
-<li> CPU 1: <tt>q = rcu_dereference(gp);
- /* Very likely to return p. */</tt>
-<li> CPU 0: <tt>list_del_rcu(p);</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li> CPU 1: <tt>do_something_with(q-&gt;a);
- /* No smp_mb(), so might happen after kfree(). */</tt>
-<li> CPU 1: <tt>rcu_read_unlock()</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li> CPU 0: <tt>kfree(p);</tt>
-</ol>
-
-<p>
-Therefore, there absolutely must be a full memory barrier between the
-end of the RCU read-side critical section and the end of the
-grace period.
-
-<p>
-The sequence of events demonstrating the necessity of the second rule
-is roughly similar:
-
-<ol>
-<li> CPU 0: <tt>list_del_rcu(p);</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li> CPU 1: <tt>rcu_read_lock()</tt>
-<li> CPU 1: <tt>q = rcu_dereference(gp);
- /* Might return p if no memory barrier. */</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li> CPU 0: <tt>kfree(p);</tt>
-<li> CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
-<li> CPU 1: <tt>rcu_read_unlock()</tt>
-</ol>
-
-<p>
-And similarly, without a memory barrier between the beginning of the
-grace period and the beginning of the RCU read-side critical section,
-CPU&nbsp;1 might end up accessing the freelist.
-
-<p>
-The &ldquo;as if&rdquo; rule of course applies, so that any implementation
-that acts as if the appropriate memory barriers were in place is a
-correct implementation.
-That said, it is much easier to fool yourself into believing that you have
-adhered to the as-if rule than it is to actually adhere to it!
-
-
-</p><p><a href="#Quick%20Quiz%206"><b>Back to Quick Quiz 6</b>.</a>
-
-<a name="qq7answer"></a>
-<p><b>Quick Quiz 7</b>:
-But how does the upgrade-to-write operation exclude other readers?
-
-
-</p><p><b>Answer</b>:
-It doesn't, just like normal RCU updates, which also do not exclude
-RCU readers.
-
-
-</p><p><a href="#Quick%20Quiz%207"><b>Back to Quick Quiz 7</b>.</a>
-
-<a name="qq8answer"></a>
-<p><b>Quick Quiz 8</b>:
-Can't the compiler also reorder this code?
-
-
-</p><p><b>Answer</b>:
-No, the volatile casts in <tt>READ_ONCE()</tt> and
-<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-this particular case.
-
-
-</p><p><a href="#Quick%20Quiz%208"><b>Back to Quick Quiz 8</b>.</a>
-
-<a name="qq9answer"></a>
-<p><b>Quick Quiz 9</b>:
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-
-
-</p><p><b>Answer</b>:
-No.
-Even if <tt>synchronize_rcu()</tt> were to wait until
-all readers had completed, a new reader might start immediately after
-<tt>synchronize_rcu()</tt> completed.
-Therefore, the code following
-<tt>synchronize_rcu()</tt> cannot rely on there being no readers
-in any case.
-
-
-</p><p><a href="#Quick%20Quiz%209"><b>Back to Quick Quiz 9</b>.</a>
-
-<a name="qq10answer"></a>
-<p><b>Quick Quiz 10</b>:
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-
-
-</p><p><b>Answer</b>:
-In theory, an infinite number.
-In practice, an unknown number that is sensitive to both implementation
-details and timing considerations.
-Therefore, even in practice, RCU users must abide by the theoretical rather
-than the practical answer.
-
-
-</p><p><a href="#Quick%20Quiz%2010"><b>Back to Quick Quiz 10</b>.</a>
-
-<a name="qq11answer"></a>
-<p><b>Quick Quiz 11</b>:
-What about sleeping locks?
-
-
-</p><p><b>Answer</b>:
-These are forbidden within Linux-kernel RCU read-side critical sections
-because it is not legal to place a quiescent state (in this case,
-voluntary context switch) within an RCU read-side critical section.
-However, sleeping locks may be used within userspace RCU read-side critical
-sections, and also within Linux-kernel sleepable RCU
-<a href="#Sleepable RCU">(SRCU)</a>
-read-side critical sections.
-In addition, the -rt patchset turns spinlocks into a sleeping locks so
-that the corresponding critical sections can be preempted, which
-also means that these sleeplockified spinlocks (but not other sleeping locks!)
-may be acquire within -rt-Linux-kernel RCU read-side critical sections.
-
-<p>
-Note that it <i>is</i> legal for a normal RCU read-side critical section
-to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
-but only as long as it does not loop indefinitely attempting to
-conditionally acquire that sleeping locks.
-The key point is that things like <tt>mutex_trylock()</tt>
-either return with the mutex held, or return an error indication if
-the mutex was not immediately available.
-Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
-
-
-</p><p><a href="#Quick%20Quiz%2011"><b>Back to Quick Quiz 11</b>.</a>
-
-<a name="qq12answer"></a>
-<p><b>Quick Quiz 12</b>:
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-
-
-</p><p><b>Answer</b>:
-Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
-any changes, including any insertions that <tt>rcu_dereference()</tt>
-would protect against.
-Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
-is released on line&nbsp;25, which in turn means that
-<tt>rcu_access_pointer()</tt> suffices.
-
-
-</p><p><a href="#Quick%20Quiz%2012"><b>Back to Quick Quiz 12</b>.</a>
-
-<a name="qq13answer"></a>
-<p><b>Quick Quiz 13</b>:
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-
-
-</p><p><b>Answer</b>:
-We could define things this way, but keep in mind that this sort of
-definition would say that updates in garbage-collected languages
-cannot complete until the next time the garbage collector runs,
-which does not seem at all reasonable.
-The key point is that in most cases, an updater using either
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-next update as soon as it has invoked <tt>call_rcu()</tt> or
-<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-grace period.
-
-
-</p><p><a href="#Quick%20Quiz%2013"><b>Back to Quick Quiz 13</b>.</a>
-
-<a name="qq14answer"></a>
-<p><b>Quick Quiz 14</b>:
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-
-
-</p><p><b>Answer</b>:
-In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-maps directly to <tt>synchronize_sched()</tt>.
-Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-during scheduler initialization.
-
-
-</p><p><a href="#Quick%20Quiz%2014"><b>Back to Quick Quiz 14</b>.</a>
-
-
</body></html>
diff --git a/Documentation/RCU/Design/Requirements/Requirements.htmlx b/Documentation/RCU/Design/Requirements/Requirements.htmlx
deleted file mode 100644
index 3a97ba490c42..000000000000
--- a/Documentation/RCU/Design/Requirements/Requirements.htmlx
+++ /dev/null
@@ -1,2741 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
- "http://www.w3.org/TR/html4/loose.dtd">
- <html>
- <head><title>A Tour Through RCU's Requirements [LWN.net]</title>
- <meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
-
-<h1>A Tour Through RCU's Requirements</h1>
-
-<p>Copyright IBM Corporation, 2015</p>
-<p>Author: Paul E.&nbsp;McKenney</p>
-<p><i>The initial version of this document appeared in the
-<a href="https://lwn.net/">LWN</a> articles
-<a href="https://lwn.net/Articles/652156/">here</a>,
-<a href="https://lwn.net/Articles/652677/">here</a>, and
-<a href="https://lwn.net/Articles/653326/">here</a>.</i></p>
-
-<h2>Introduction</h2>
-
-<p>
-Read-copy update (RCU) is a synchronization mechanism that is often
-used as a replacement for reader-writer locking.
-RCU is unusual in that updaters do not block readers,
-which means that RCU's read-side primitives can be exceedingly fast
-and scalable.
-In addition, updaters can make useful forward progress concurrently
-with readers.
-However, all this concurrency between RCU readers and updaters does raise
-the question of exactly what RCU readers are doing, which in turn
-raises the question of exactly what RCU's requirements are.
-
-<p>
-This document therefore summarizes RCU's requirements, and can be thought
-of as an informal, high-level specification for RCU.
-It is important to understand that RCU's specification is primarily
-empirical in nature;
-in fact, I learned about many of these requirements the hard way.
-This situation might cause some consternation, however, not only
-has this learning process been a lot of fun, but it has also been
-a great privilege to work with so many people willing to apply
-technologies in interesting new ways.
-
-<p>
-All that aside, here are the categories of currently known RCU requirements:
-</p>
-
-<ol>
-<li> <a href="#Fundamental Requirements">
- Fundamental Requirements</a>
-<li> <a href="#Fundamental Non-Requirements">Fundamental Non-Requirements</a>
-<li> <a href="#Parallelism Facts of Life">
- Parallelism Facts of Life</a>
-<li> <a href="#Quality-of-Implementation Requirements">
- Quality-of-Implementation Requirements</a>
-<li> <a href="#Linux Kernel Complications">
- Linux Kernel Complications</a>
-<li> <a href="#Software-Engineering Requirements">
- Software-Engineering Requirements</a>
-<li> <a href="#Other RCU Flavors">
- Other RCU Flavors</a>
-<li> <a href="#Possible Future Changes">
- Possible Future Changes</a>
-</ol>
-
-<p>
-This is followed by a <a href="#Summary">summary</a>,
-which is in turn followed by the inevitable
-<a href="#Answers to Quick Quizzes">answers to the quick quizzes</a>.
-
-<h2><a name="Fundamental Requirements">Fundamental Requirements</a></h2>
-
-<p>
-RCU's fundamental requirements are the closest thing RCU has to hard
-mathematical requirements.
-These are:
-
-<ol>
-<li> <a href="#Grace-Period Guarantee">
- Grace-Period Guarantee</a>
-<li> <a href="#Publish-Subscribe Guarantee">
- Publish-Subscribe Guarantee</a>
-<li> <a href="#Memory-Barrier Guarantees">
- Memory-Barrier Guarantees</a>
-<li> <a href="#RCU Primitives Guaranteed to Execute Unconditionally">
- RCU Primitives Guaranteed to Execute Unconditionally</a>
-<li> <a href="#Guaranteed Read-to-Write Upgrade">
- Guaranteed Read-to-Write Upgrade</a>
-</ol>
-
-<h3><a name="Grace-Period Guarantee">Grace-Period Guarantee</a></h3>
-
-<p>
-RCU's grace-period guarantee is unusual in being premeditated:
-Jack Slingwine and I had this guarantee firmly in mind when we started
-work on RCU (then called &ldquo;rclock&rdquo;) in the early 1990s.
-That said, the past two decades of experience with RCU have produced
-a much more detailed understanding of this guarantee.
-
-<p>
-RCU's grace-period guarantee allows updaters to wait for the completion
-of all pre-existing RCU read-side critical sections.
-An RCU read-side critical section
-begins with the marker <tt>rcu_read_lock()</tt> and ends with
-the marker <tt>rcu_read_unlock()</tt>.
-These markers may be nested, and RCU treats a nested set as one
-big RCU read-side critical section.
-Production-quality implementations of <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> are extremely lightweight, and in
-fact have exactly zero overhead in Linux kernels built for production
-use with <tt>CONFIG_PREEMPT=n</tt>.
-
-<p>
-This guarantee allows ordering to be enforced with extremely low
-overhead to readers, for example:
-
-<blockquote>
-<pre>
- 1 int x, y;
- 2
- 3 void thread0(void)
- 4 {
- 5 rcu_read_lock();
- 6 r1 = READ_ONCE(x);
- 7 r2 = READ_ONCE(y);
- 8 rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13 WRITE_ONCE(x, 1);
-14 synchronize_rcu();
-15 WRITE_ONCE(y, 1);
-16 }
-</pre>
-</blockquote>
-
-<p>
-Because the <tt>synchronize_rcu()</tt> on line&nbsp;14 waits for
-all pre-existing readers, any instance of <tt>thread0()</tt> that
-loads a value of zero from <tt>x</tt> must complete before
-<tt>thread1()</tt> stores to <tt>y</tt>, so that instance must
-also load a value of zero from <tt>y</tt>.
-Similarly, any instance of <tt>thread0()</tt> that loads a value of
-one from <tt>y</tt> must have started after the
-<tt>synchronize_rcu()</tt> started, and must therefore also load
-a value of one from <tt>x</tt>.
-Therefore, the outcome:
-<blockquote>
-<pre>
-(r1 == 0 &amp;&amp; r2 == 1)
-</pre>
-</blockquote>
-cannot happen.
-
-<p>@@QQ@@
-Wait a minute!
-You said that updaters can make useful forward progress concurrently
-with readers, but pre-existing readers will block
-<tt>synchronize_rcu()</tt>!!!
-Just who are you trying to fool???
-<p>@@QQA@@
-First, if updaters do not wish to be blocked by readers, they can use
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt>, which will
-be discussed later.
-Second, even when using <tt>synchronize_rcu()</tt>, the other
-update-side code does run concurrently with readers, whether pre-existing
-or not.
-<p>@@QQE@@
-
-<p>
-This scenario resembles one of the first uses of RCU in
-<a href="https://en.wikipedia.org/wiki/DYNIX">DYNIX/ptx</a>,
-which managed a distributed lock manager's transition into
-a state suitable for handling recovery from node failure,
-more or less as follows:
-
-<blockquote>
-<pre>
- 1 #define STATE_NORMAL 0
- 2 #define STATE_WANT_RECOVERY 1
- 3 #define STATE_RECOVERING 2
- 4 #define STATE_WANT_NORMAL 3
- 5
- 6 int state = STATE_NORMAL;
- 7
- 8 void do_something_dlm(void)
- 9 {
-10 int state_snap;
-11
-12 rcu_read_lock();
-13 state_snap = READ_ONCE(state);
-14 if (state_snap == STATE_NORMAL)
-15 do_something();
-16 else
-17 do_something_carefully();
-18 rcu_read_unlock();
-19 }
-20
-21 void start_recovery(void)
-22 {
-23 WRITE_ONCE(state, STATE_WANT_RECOVERY);
-24 synchronize_rcu();
-25 WRITE_ONCE(state, STATE_RECOVERING);
-26 recovery();
-27 WRITE_ONCE(state, STATE_WANT_NORMAL);
-28 synchronize_rcu();
-29 WRITE_ONCE(state, STATE_NORMAL);
-30 }
-</pre>
-</blockquote>
-
-<p>
-The RCU read-side critical section in <tt>do_something_dlm()</tt>
-works with the <tt>synchronize_rcu()</tt> in <tt>start_recovery()</tt>
-to guarantee that <tt>do_something()</tt> never runs concurrently
-with <tt>recovery()</tt>, but with little or no synchronization
-overhead in <tt>do_something_dlm()</tt>.
-
-<p>@@QQ@@
-Why is the <tt>synchronize_rcu()</tt> on line&nbsp;28 needed?
-<p>@@QQA@@
-Without that extra grace period, memory reordering could result in
-<tt>do_something_dlm()</tt> executing <tt>do_something()</tt>
-concurrently with the last bits of <tt>recovery()</tt>.
-<p>@@QQE@@
-
-<p>
-In order to avoid fatal problems such as deadlocks,
-an RCU read-side critical section must not contain calls to
-<tt>synchronize_rcu()</tt>.
-Similarly, an RCU read-side critical section must not
-contain anything that waits, directly or indirectly, on completion of
-an invocation of <tt>synchronize_rcu()</tt>.
-
-<p>
-Although RCU's grace-period guarantee is useful in and of itself, with
-<a href="https://lwn.net/Articles/573497/">quite a few use cases</a>,
-it would be good to be able to use RCU to coordinate read-side
-access to linked data structures.
-For this, the grace-period guarantee is not sufficient, as can
-be seen in function <tt>add_gp_buggy()</tt> below.
-We will look at the reader's code later, but in the meantime, just think of
-the reader as locklessly picking up the <tt>gp</tt> pointer,
-and, if the value loaded is non-<tt>NULL</tt>, locklessly accessing the
-<tt>-&gt;a</tt> and <tt>-&gt;b</tt> fields.
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&amp;gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&amp;gp_lock);
- 9 return false;
-10 }
-11 p-&gt;a = a;
-12 p-&gt;b = a;
-13 gp = p; /* ORDERING BUG */
-14 spin_unlock(&amp;gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The problem is that both the compiler and weakly ordered CPUs are within
-their rights to reorder this code as follows:
-
-<blockquote>
-<pre>
- 1 bool add_gp_buggy_optimized(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&amp;gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&amp;gp_lock);
- 9 return false;
-10 }
-<b>11 gp = p; /* ORDERING BUG */
-12 p-&gt;a = a;
-13 p-&gt;b = a;</b>
-14 spin_unlock(&amp;gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-If an RCU reader fetches <tt>gp</tt> just after
-<tt>add_gp_buggy_optimized</tt> executes line&nbsp;11,
-it will see garbage in the <tt>-&gt;a</tt> and <tt>-&gt;b</tt>
-fields.
-And this is but one of many ways in which compiler and hardware optimizations
-could cause trouble.
-Therefore, we clearly need some way to prevent the compiler and the CPU from
-reordering in this manner, which brings us to the publish-subscribe
-guarantee discussed in the next section.
-
-<h3><a name="Publish-Subscribe Guarantee">Publish/Subscribe Guarantee</a></h3>
-
-<p>
-RCU's publish-subscribe guarantee allows data to be inserted
-into a linked data structure without disrupting RCU readers.
-The updater uses <tt>rcu_assign_pointer()</tt> to insert the
-new data, and readers use <tt>rcu_dereference()</tt> to
-access data, whether new or old.
-The following shows an example of insertion:
-
-<blockquote>
-<pre>
- 1 bool add_gp(int a, int b)
- 2 {
- 3 p = kmalloc(sizeof(*p), GFP_KERNEL);
- 4 if (!p)
- 5 return -ENOMEM;
- 6 spin_lock(&amp;gp_lock);
- 7 if (rcu_access_pointer(gp)) {
- 8 spin_unlock(&amp;gp_lock);
- 9 return false;
-10 }
-11 p-&gt;a = a;
-12 p-&gt;b = a;
-13 rcu_assign_pointer(gp, p);
-14 spin_unlock(&amp;gp_lock);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_assign_pointer()</tt> on line&nbsp;13 is conceptually
-equivalent to a simple assignment statement, but also guarantees
-that its assignment will
-happen after the two assignments in lines&nbsp;11 and&nbsp;12,
-similar to the C11 <tt>memory_order_release</tt> store operation.
-It also prevents any number of &ldquo;interesting&rdquo; compiler
-optimizations, for example, the use of <tt>gp</tt> as a scratch
-location immediately preceding the assignment.
-
-<p>@@QQ@@
-But <tt>rcu_assign_pointer()</tt> does nothing to prevent the
-two assignments to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt>
-from being reordered.
-Can't that also cause problems?
-<p>@@QQA@@
-No, it cannot.
-The readers cannot see either of these two fields until
-the assignment to <tt>gp</tt>, by which time both fields are
-fully initialized.
-So reordering the assignments
-to <tt>p-&gt;a</tt> and <tt>p-&gt;b</tt> cannot possibly
-cause any problems.
-<p>@@QQE@@
-
-<p>
-It is tempting to assume that the reader need not do anything special
-to control its accesses to the RCU-protected data,
-as shown in <tt>do_something_gp_buggy()</tt> below:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy(void)
- 2 {
- 3 rcu_read_lock();
- 4 p = gp; /* OPTIMIZATIONS GALORE!!! */
- 5 if (p) {
- 6 do_something(p-&gt;a, p-&gt;b);
- 7 rcu_read_unlock();
- 8 return true;
- 9 }
-10 rcu_read_unlock();
-11 return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-However, this temptation must be resisted because there are a
-surprisingly large number of ways that the compiler
-(to say nothing of
-<a href="https://h71000.www7.hp.com/wizard/wiz_2637.html">DEC Alpha CPUs</a>)
-can trip this code up.
-For but one example, if the compiler were short of registers, it
-might choose to refetch from <tt>gp</tt> rather than keeping
-a separate copy in <tt>p</tt> as follows:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp_buggy_optimized(void)
- 2 {
- 3 rcu_read_lock();
- 4 if (gp) { /* OPTIMIZATIONS GALORE!!! */
-<b> 5 do_something(gp-&gt;a, gp-&gt;b);</b>
- 6 rcu_read_unlock();
- 7 return true;
- 8 }
- 9 rcu_read_unlock();
-10 return false;
-11 }
-</pre>
-</blockquote>
-
-<p>
-If this function ran concurrently with a series of updates that
-replaced the current structure with a new one,
-the fetches of <tt>gp-&gt;a</tt>
-and <tt>gp-&gt;b</tt> might well come from two different structures,
-which could cause serious confusion.
-To prevent this (and much else besides), <tt>do_something_gp()</tt> uses
-<tt>rcu_dereference()</tt> to fetch from <tt>gp</tt>:
-
-<blockquote>
-<pre>
- 1 bool do_something_gp(void)
- 2 {
- 3 rcu_read_lock();
- 4 p = rcu_dereference(gp);
- 5 if (p) {
- 6 do_something(p-&gt;a, p-&gt;b);
- 7 rcu_read_unlock();
- 8 return true;
- 9 }
-10 rcu_read_unlock();
-11 return false;
-12 }
-</pre>
-</blockquote>
-
-<p>
-The <tt>rcu_dereference()</tt> uses volatile casts and (for DEC Alpha)
-memory barriers in the Linux kernel.
-Should a
-<a href="http://www.rdrop.com/users/paulmck/RCU/consume.2015.07.13a.pdf">high-quality implementation of C11 <tt>memory_order_consume</tt> [PDF]</a>
-ever appear, then <tt>rcu_dereference()</tt> could be implemented
-as a <tt>memory_order_consume</tt> load.
-Regardless of the exact implementation, a pointer fetched by
-<tt>rcu_dereference()</tt> may not be used outside of the
-outermost RCU read-side critical section containing that
-<tt>rcu_dereference()</tt>, unless protection of
-the corresponding data element has been passed from RCU to some
-other synchronization mechanism, most commonly locking or
-<a href="https://www.kernel.org/doc/Documentation/RCU/rcuref.txt">reference counting</a>.
-
-<p>
-In short, updaters use <tt>rcu_assign_pointer()</tt> and readers
-use <tt>rcu_dereference()</tt>, and these two RCU API elements
-work together to ensure that readers have a consistent view of
-newly added data elements.
-
-<p>
-Of course, it is also necessary to remove elements from RCU-protected
-data structures, for example, using the following process:
-
-<ol>
-<li> Remove the data element from the enclosing structure.
-<li> Wait for all pre-existing RCU read-side critical sections
- to complete (because only pre-existing readers can possibly have
- a reference to the newly removed data element).
-<li> At this point, only the updater has a reference to the
- newly removed data element, so it can safely reclaim
- the data element, for example, by passing it to <tt>kfree()</tt>.
-</ol>
-
-This process is implemented by <tt>remove_gp_synchronous()</tt>:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_synchronous(void)
- 2 {
- 3 struct foo *p;
- 4
- 5 spin_lock(&amp;gp_lock);
- 6 p = rcu_access_pointer(gp);
- 7 if (!p) {
- 8 spin_unlock(&amp;gp_lock);
- 9 return false;
-10 }
-11 rcu_assign_pointer(gp, NULL);
-12 spin_unlock(&amp;gp_lock);
-13 synchronize_rcu();
-14 kfree(p);
-15 return true;
-16 }
-</pre>
-</blockquote>
-
-<p>
-This function is straightforward, with line&nbsp;13 waiting for a grace
-period before line&nbsp;14 frees the old data element.
-This waiting ensures that readers will reach line&nbsp;7 of
-<tt>do_something_gp()</tt> before the data element referenced by
-<tt>p</tt> is freed.
-The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
-<tt>rcu_dereference()</tt>, except that:
-
-<ol>
-<li> The value returned by <tt>rcu_access_pointer()</tt>
- cannot be dereferenced.
- If you want to access the value pointed to as well as
- the pointer itself, use <tt>rcu_dereference()</tt>
- instead of <tt>rcu_access_pointer()</tt>.
-<li> The call to <tt>rcu_access_pointer()</tt> need not be
- protected.
- In contrast, <tt>rcu_dereference()</tt> must either be
- within an RCU read-side critical section or in a code
- segment where the pointer cannot change, for example, in
- code protected by the corresponding update-side lock.
-</ol>
-
-<p>@@QQ@@
-Without the <tt>rcu_dereference()</tt> or the
-<tt>rcu_access_pointer()</tt>, what destructive optimizations
-might the compiler make use of?
-<p>@@QQA@@
-Let's start with what happens to <tt>do_something_gp()</tt>
-if it fails to use <tt>rcu_dereference()</tt>.
-It could reuse a value formerly fetched from this same pointer.
-It could also fetch the pointer from <tt>gp</tt> in a byte-at-a-time
-manner, resulting in <i>load tearing</i>, in turn resulting a bytewise
-mash-up of two distince pointer values.
-It might even use value-speculation optimizations, where it makes a wrong
-guess, but by the time it gets around to checking the value, an update
-has changed the pointer to match the wrong guess.
-Too bad about any dereferences that returned pre-initialization garbage
-in the meantime!
-
-<p>
-For <tt>remove_gp_synchronous()</tt>, as long as all modifications
-to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
-the above optimizations are harmless.
-However,
-with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
-<tt>sparse</tt> will complain if you
-define <tt>gp</tt> with <tt>__rcu</tt> and then
-access it without using
-either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
-<p>@@QQE@@
-
-<p>
-In short, RCU's publish-subscribe guarantee is provided by the combination
-of <tt>rcu_assign_pointer()</tt> and <tt>rcu_dereference()</tt>.
-This guarantee allows data elements to be safely added to RCU-protected
-linked data structures without disrupting RCU readers.
-This guarantee can be used in combination with the grace-period
-guarantee to also allow data elements to be removed from RCU-protected
-linked data structures, again without disrupting RCU readers.
-
-<p>
-This guarantee was only partially premeditated.
-DYNIX/ptx used an explicit memory barrier for publication, but had nothing
-resembling <tt>rcu_dereference()</tt> for subscription, nor did it
-have anything resembling the <tt>smp_read_barrier_depends()</tt>
-that was later subsumed into <tt>rcu_dereference()</tt>.
-The need for these operations made itself known quite suddenly at a
-late-1990s meeting with the DEC Alpha architects, back in the days when
-DEC was still a free-standing company.
-It took the Alpha architects a good hour to convince me that any sort
-of barrier would ever be needed, and it then took me a good <i>two</i> hours
-to convince them that their documentation did not make this point clear.
-More recent work with the C and C++ standards committees have provided
-much education on tricks and traps from the compiler.
-In short, compilers were much less tricky in the early 1990s, but in
-2015, don't even think about omitting <tt>rcu_dereference()</tt>!
-
-<h3><a name="Memory-Barrier Guarantees">Memory-Barrier Guarantees</a></h3>
-
-<p>
-The previous section's simple linked-data-structure scenario clearly
-demonstrates the need for RCU's stringent memory-ordering guarantees on
-systems with more than one CPU:
-
-<ol>
-<li> Each CPU that has an RCU read-side critical section that
- begins before <tt>synchronize_rcu()</tt> starts is
- guaranteed to execute a full memory barrier between the time
- that the RCU read-side critical section ends and the time that
- <tt>synchronize_rcu()</tt> returns.
- Without this guarantee, a pre-existing RCU read-side critical section
- might hold a reference to the newly removed <tt>struct foo</tt>
- after the <tt>kfree()</tt> on line&nbsp;14 of
- <tt>remove_gp_synchronous()</tt>.
-<li> Each CPU that has an RCU read-side critical section that ends
- after <tt>synchronize_rcu()</tt> returns is guaranteed
- to execute a full memory barrier between the time that
- <tt>synchronize_rcu()</tt> begins and the time that the RCU
- read-side critical section begins.
- Without this guarantee, a later RCU read-side critical section
- running after the <tt>kfree()</tt> on line&nbsp;14 of
- <tt>remove_gp_synchronous()</tt> might
- later run <tt>do_something_gp()</tt> and find the
- newly deleted <tt>struct foo</tt>.
-<li> If the task invoking <tt>synchronize_rcu()</tt> remains
- on a given CPU, then that CPU is guaranteed to execute a full
- memory barrier sometime during the execution of
- <tt>synchronize_rcu()</tt>.
- This guarantee ensures that the <tt>kfree()</tt> on
- line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
- execute after the removal on line&nbsp;11.
-<li> If the task invoking <tt>synchronize_rcu()</tt> migrates
- among a group of CPUs during that invocation, then each of the
- CPUs in that group is guaranteed to execute a full memory barrier
- sometime during the execution of <tt>synchronize_rcu()</tt>.
- This guarantee also ensures that the <tt>kfree()</tt> on
- line&nbsp;14 of <tt>remove_gp_synchronous()</tt> really does
- execute after the removal on
- line&nbsp;11, but also in the case where the thread executing the
- <tt>synchronize_rcu()</tt> migrates in the meantime.
-</ol>
-
-<p>@@QQ@@
-Given that multiple CPUs can start RCU read-side critical sections
-at any time without any ordering whatsoever, how can RCU possibly tell whether
-or not a given RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>?
-<p>@@QQA@@
-If RCU cannot tell whether or not a given
-RCU read-side critical section starts before a
-given instance of <tt>synchronize_rcu()</tt>,
-then it must assume that the RCU read-side critical section
-started first.
-In other words, a given instance of <tt>synchronize_rcu()</tt>
-can avoid waiting on a given RCU read-side critical section only
-if it can prove that <tt>synchronize_rcu()</tt> started first.
-<p>@@QQE@@
-
-<p>@@QQ@@
-The first and second guarantees require unbelievably strict ordering!
-Are all these memory barriers <i> really</i> required?
-<p>@@QQA@@
-Yes, they really are required.
-To see why the first guarantee is required, consider the following
-sequence of events:
-
-<ol>
-<li> CPU 1: <tt>rcu_read_lock()</tt>
-<li> CPU 1: <tt>q = rcu_dereference(gp);
- /* Very likely to return p. */</tt>
-<li> CPU 0: <tt>list_del_rcu(p);</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li> CPU 1: <tt>do_something_with(q-&gt;a);
- /* No smp_mb(), so might happen after kfree(). */</tt>
-<li> CPU 1: <tt>rcu_read_unlock()</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li> CPU 0: <tt>kfree(p);</tt>
-</ol>
-
-<p>
-Therefore, there absolutely must be a full memory barrier between the
-end of the RCU read-side critical section and the end of the
-grace period.
-
-<p>
-The sequence of events demonstrating the necessity of the second rule
-is roughly similar:
-
-<ol>
-<li> CPU 0: <tt>list_del_rcu(p);</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> starts.
-<li> CPU 1: <tt>rcu_read_lock()</tt>
-<li> CPU 1: <tt>q = rcu_dereference(gp);
- /* Might return p if no memory barrier. */</tt>
-<li> CPU 0: <tt>synchronize_rcu()</tt> returns.
-<li> CPU 0: <tt>kfree(p);</tt>
-<li> CPU 1: <tt>do_something_with(q-&gt;a); /* Boom!!! */</tt>
-<li> CPU 1: <tt>rcu_read_unlock()</tt>
-</ol>
-
-<p>
-And similarly, without a memory barrier between the beginning of the
-grace period and the beginning of the RCU read-side critical section,
-CPU&nbsp;1 might end up accessing the freelist.
-
-<p>
-The &ldquo;as if&rdquo; rule of course applies, so that any implementation
-that acts as if the appropriate memory barriers were in place is a
-correct implementation.
-That said, it is much easier to fool yourself into believing that you have
-adhered to the as-if rule than it is to actually adhere to it!
-<p>@@QQE@@
-
-<p>
-Note that these memory-barrier requirements do not replace the fundamental
-RCU requirement that a grace period wait for all pre-existing readers.
-On the contrary, the memory barriers called out in this section must operate in
-such a way as to <i>enforce</i> this fundamental requirement.
-Of course, different implementations enforce this requirement in different
-ways, but enforce it they must.
-
-<h3><a name="RCU Primitives Guaranteed to Execute Unconditionally">RCU Primitives Guaranteed to Execute Unconditionally</a></h3>
-
-<p>
-The common-case RCU primitives are unconditional.
-They are invoked, they do their job, and they return, with no possibility
-of error, and no need to retry.
-This is a key RCU design philosophy.
-
-<p>
-However, this philosophy is pragmatic rather than pigheaded.
-If someone comes up with a good justification for a particular conditional
-RCU primitive, it might well be implemented and added.
-After all, this guarantee was reverse-engineered, not premeditated.
-The unconditional nature of the RCU primitives was initially an
-accident of implementation, and later experience with synchronization
-primitives with conditional primitives caused me to elevate this
-accident to a guarantee.
-Therefore, the justification for adding a conditional primitive to
-RCU would need to be based on detailed and compelling use cases.
-
-<h3><a name="Guaranteed Read-to-Write Upgrade">Guaranteed Read-to-Write Upgrade</a></h3>
-
-<p>
-As far as RCU is concerned, it is always possible to carry out an
-update within an RCU read-side critical section.
-For example, that RCU read-side critical section might search for
-a given data element, and then might acquire the update-side
-spinlock in order to update that element, all while remaining
-in that RCU read-side critical section.
-Of course, it is necessary to exit the RCU read-side critical section
-before invoking <tt>synchronize_rcu()</tt>, however, this
-inconvenience can be avoided through use of the
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt> API members
-described later in this document.
-
-<p>@@QQ@@
-But how does the upgrade-to-write operation exclude other readers?
-<p>@@QQA@@
-It doesn't, just like normal RCU updates, which also do not exclude
-RCU readers.
-<p>@@QQE@@
-
-<p>
-This guarantee allows lookup code to be shared between read-side
-and update-side code, and was premeditated, appearing in the earliest
-DYNIX/ptx RCU documentation.
-
-<h2><a name="Fundamental Non-Requirements">Fundamental Non-Requirements</a></h2>
-
-<p>
-RCU provides extremely lightweight readers, and its read-side guarantees,
-though quite useful, are correspondingly lightweight.
-It is therefore all too easy to assume that RCU is guaranteeing more
-than it really is.
-Of course, the list of things that RCU does not guarantee is infinitely
-long, however, the following sections list a few non-guarantees that
-have caused confusion.
-Except where otherwise noted, these non-guarantees were premeditated.
-
-<ol>
-<li> <a href="#Readers Impose Minimal Ordering">
- Readers Impose Minimal Ordering</a>
-<li> <a href="#Readers Do Not Exclude Updaters">
- Readers Do Not Exclude Updaters</a>
-<li> <a href="#Updaters Only Wait For Old Readers">
- Updaters Only Wait For Old Readers</a>
-<li> <a href="#Grace Periods Don't Partition Read-Side Critical Sections">
- Grace Periods Don't Partition Read-Side Critical Sections</a>
-<li> <a href="#Read-Side Critical Sections Don't Partition Grace Periods">
- Read-Side Critical Sections Don't Partition Grace Periods</a>
-<li> <a href="#Disabling Preemption Does Not Block Grace Periods">
- Disabling Preemption Does Not Block Grace Periods</a>
-</ol>
-
-<h3><a name="Readers Impose Minimal Ordering">Readers Impose Minimal Ordering</a></h3>
-
-<p>
-Reader-side markers such as <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> provide absolutely no ordering guarantees
-except through their interaction with the grace-period APIs such as
-<tt>synchronize_rcu()</tt>.
-To see this, consider the following pair of threads:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(x, 1);
- 5 rcu_read_unlock();
- 6 rcu_read_lock();
- 7 WRITE_ONCE(y, 1);
- 8 rcu_read_unlock();
- 9 }
-10
-11 void thread1(void)
-12 {
-13 rcu_read_lock();
-14 r1 = READ_ONCE(y);
-15 rcu_read_unlock();
-16 rcu_read_lock();
-17 r2 = READ_ONCE(x);
-18 rcu_read_unlock();
-19 }
-</pre>
-</blockquote>
-
-<p>
-After <tt>thread0()</tt> and <tt>thread1()</tt> execute
-concurrently, it is quite possible to have
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0)
-</pre>
-</blockquote>
-
-(that is, <tt>y</tt> appears to have been assigned before <tt>x</tt>),
-which would not be possible if <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> had much in the way of ordering
-properties.
-But they do not, so the CPU is within its rights
-to do significant reordering.
-This is by design: Any significant ordering constraints would slow down
-these fast-path APIs.
-
-<p>@@QQ@@
-Can't the compiler also reorder this code?
-<p>@@QQA@@
-No, the volatile casts in <tt>READ_ONCE()</tt> and
-<tt>WRITE_ONCE()</tt> prevent the compiler from reordering in
-this particular case.
-<p>@@QQE@@
-
-<h3><a name="Readers Do Not Exclude Updaters">Readers Do Not Exclude Updaters</a></h3>
-
-<p>
-Neither <tt>rcu_read_lock()</tt> nor <tt>rcu_read_unlock()</tt>
-exclude updates.
-All they do is to prevent grace periods from ending.
-The following example illustrates this:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 r1 = READ_ONCE(y);
- 5 if (r1) {
- 6 do_something_with_nonzero_x();
- 7 r2 = READ_ONCE(x);
- 8 WARN_ON(!r2); /* BUG!!! */
- 9 }
-10 rcu_read_unlock();
-11 }
-12
-13 void thread1(void)
-14 {
-15 spin_lock(&amp;my_lock);
-16 WRITE_ONCE(x, 1);
-17 WRITE_ONCE(y, 1);
-18 spin_unlock(&amp;my_lock);
-19 }
-</pre>
-</blockquote>
-
-<p>
-If the <tt>thread0()</tt> function's <tt>rcu_read_lock()</tt>
-excluded the <tt>thread1()</tt> function's update,
-the <tt>WARN_ON()</tt> could never fire.
-But the fact is that <tt>rcu_read_lock()</tt> does not exclude
-much of anything aside from subsequent grace periods, of which
-<tt>thread1()</tt> has none, so the
-<tt>WARN_ON()</tt> can and does fire.
-
-<h3><a name="Updaters Only Wait For Old Readers">Updaters Only Wait For Old Readers</a></h3>
-
-<p>
-It might be tempting to assume that after <tt>synchronize_rcu()</tt>
-completes, there are no readers executing.
-This temptation must be avoided because
-new readers can start immediately after <tt>synchronize_rcu()</tt>
-starts, and <tt>synchronize_rcu()</tt> is under no
-obligation to wait for these new readers.
-
-<p>@@QQ@@
-Suppose that synchronize_rcu() did wait until all readers had completed.
-Would the updater be able to rely on this?
-<p>@@QQA@@
-No.
-Even if <tt>synchronize_rcu()</tt> were to wait until
-all readers had completed, a new reader might start immediately after
-<tt>synchronize_rcu()</tt> completed.
-Therefore, the code following
-<tt>synchronize_rcu()</tt> cannot rely on there being no readers
-in any case.
-<p>@@QQE@@
-
-<h3><a name="Grace Periods Don't Partition Read-Side Critical Sections">
-Grace Periods Don't Partition Read-Side Critical Sections</a></h3>
-
-<p>
-It is tempting to assume that if any part of one RCU read-side critical
-section precedes a given grace period, and if any part of another RCU
-read-side critical section follows that same grace period, then all of
-the first RCU read-side critical section must precede all of the second.
-However, this just isn't the case: A single grace period does not
-partition the set of RCU read-side critical sections.
-An example of this situation can be illustrated as follows, where
-<tt>x</tt>, <tt>y</tt>, and <tt>z</tt> are initially all zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 rcu_read_lock();
-19 r2 = READ_ONCE(b);
-20 r3 = READ_ONCE(c);
-21 rcu_read_unlock();
-22 }
-</pre>
-</blockquote>
-
-<p>
-It turns out that the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 0 &amp;&amp; r3 == 1)
-</pre>
-</blockquote>
-
-is entirely possible.
-The following figure show how this can happen, with each circled
-<tt>QS</tt> indicating the point at which RCU recorded a
-<i>quiescent state</i> for each thread, that is, a state in which
-RCU knows that the thread cannot be in the midst of an RCU read-side
-critical section that started before the current grace period:
-
-<p><img src="GPpartitionReaders1.svg" alt="GPpartitionReaders1.svg" width="60%"></p>
-
-<p>
-If it is necessary to partition RCU read-side critical sections in this
-manner, it is necessary to use two grace periods, where the first
-grace period is known to end before the second grace period starts:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 r2 = READ_ONCE(c);
-19 synchronize_rcu();
-20 WRITE_ONCE(d, 1);
-21 }
-22
-23 void thread3(void)
-24 {
-25 rcu_read_lock();
-26 r3 = READ_ONCE(b);
-27 r4 = READ_ONCE(d);
-28 rcu_read_unlock();
-29 }
-</pre>
-</blockquote>
-
-<p>
-Here, if <tt>(r1 == 1)</tt>, then
-<tt>thread0()</tt>'s write to <tt>b</tt> must happen
-before the end of <tt>thread1()</tt>'s grace period.
-If in addition <tt>(r4 == 1)</tt>, then
-<tt>thread3()</tt>'s read from <tt>b</tt> must happen
-after the beginning of <tt>thread2()</tt>'s grace period.
-If it is also the case that <tt>(r2 == 1)</tt>, then the
-end of <tt>thread1()</tt>'s grace period must precede the
-beginning of <tt>thread2()</tt>'s grace period.
-This mean that the two RCU read-side critical sections cannot overlap,
-guaranteeing that <tt>(r3 == 1)</tt>.
-As a result, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 0 &amp;&amp; r4 == 1)
-</pre>
-</blockquote>
-
-cannot happen.
-
-<p>
-This non-requirement was also non-premeditated, but became apparent
-when studying RCU's interaction with memory ordering.
-
-<h3><a name="Read-Side Critical Sections Don't Partition Grace Periods">
-Read-Side Critical Sections Don't Partition Grace Periods</a></h3>
-
-<p>
-It is also tempting to assume that if an RCU read-side critical section
-happens between a pair of grace periods, then those grace periods cannot
-overlap.
-However, this temptation leads nowhere good, as can be illustrated by
-the following, with all variables initially zero:
-
-<blockquote>
-<pre>
- 1 void thread0(void)
- 2 {
- 3 rcu_read_lock();
- 4 WRITE_ONCE(a, 1);
- 5 WRITE_ONCE(b, 1);
- 6 rcu_read_unlock();
- 7 }
- 8
- 9 void thread1(void)
-10 {
-11 r1 = READ_ONCE(a);
-12 synchronize_rcu();
-13 WRITE_ONCE(c, 1);
-14 }
-15
-16 void thread2(void)
-17 {
-18 rcu_read_lock();
-19 WRITE_ONCE(d, 1);
-20 r2 = READ_ONCE(c);
-21 rcu_read_unlock();
-22 }
-23
-24 void thread3(void)
-25 {
-26 r3 = READ_ONCE(d);
-27 synchronize_rcu();
-28 WRITE_ONCE(e, 1);
-29 }
-30
-31 void thread4(void)
-32 {
-33 rcu_read_lock();
-34 r4 = READ_ONCE(b);
-35 r5 = READ_ONCE(e);
-36 rcu_read_unlock();
-37 }
-</pre>
-</blockquote>
-
-<p>
-In this case, the outcome:
-
-<blockquote>
-<pre>
-(r1 == 1 &amp;&amp; r2 == 1 &amp;&amp; r3 == 1 &amp;&amp; r4 == 0 &amp&amp; r5 == 1)
-</pre>
-</blockquote>
-
-is entirely possible, as illustrated below:
-
-<p><img src="ReadersPartitionGP1.svg" alt="ReadersPartitionGP1.svg" width="100%"></p>
-
-<p>
-Again, an RCU read-side critical section can overlap almost all of a
-given grace period, just so long as it does not overlap the entire
-grace period.
-As a result, an RCU read-side critical section cannot partition a pair
-of RCU grace periods.
-
-<p>@@QQ@@
-How long a sequence of grace periods, each separated by an RCU read-side
-critical section, would be required to partition the RCU read-side
-critical sections at the beginning and end of the chain?
-<p>@@QQA@@
-In theory, an infinite number.
-In practice, an unknown number that is sensitive to both implementation
-details and timing considerations.
-Therefore, even in practice, RCU users must abide by the theoretical rather
-than the practical answer.
-<p>@@QQE@@
-
-<h3><a name="Disabling Preemption Does Not Block Grace Periods">
-Disabling Preemption Does Not Block Grace Periods</a></h3>
-
-<p>
-There was a time when disabling preemption on any given CPU would block
-subsequent grace periods.
-However, this was an accident of implementation and is not a requirement.
-And in the current Linux-kernel implementation, disabling preemption
-on a given CPU in fact does not block grace periods, as Oleg Nesterov
-<a href="https://lkml.kernel.org/g/20150614193825.GA19582@redhat.com">demonstrated</a>.
-
-<p>
-If you need a preempt-disable region to block grace periods, you need to add
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>, for example
-as follows:
-
-<blockquote>
-<pre>
- 1 preempt_disable();
- 2 rcu_read_lock();
- 3 do_something();
- 4 rcu_read_unlock();
- 5 preempt_enable();
- 6
- 7 /* Spinlocks implicitly disable preemption. */
- 8 spin_lock(&amp;mylock);
- 9 rcu_read_lock();
-10 do_something();
-11 rcu_read_unlock();
-12 spin_unlock(&amp;mylock);
-</pre>
-</blockquote>
-
-<p>
-In theory, you could enter the RCU read-side critical section first,
-but it is more efficient to keep the entire RCU read-side critical
-section contained in the preempt-disable region as shown above.
-Of course, RCU read-side critical sections that extend outside of
-preempt-disable regions will work correctly, but such critical sections
-can be preempted, which forces <tt>rcu_read_unlock()</tt> to do
-more work.
-And no, this is <i>not</i> an invitation to enclose all of your RCU
-read-side critical sections within preempt-disable regions, because
-doing so would degrade real-time response.
-
-<p>
-This non-requirement appeared with preemptible RCU.
-If you need a grace period that waits on non-preemptible code regions, use
-<a href="#Sched Flavor">RCU-sched</a>.
-
-<h2><a name="Parallelism Facts of Life">Parallelism Facts of Life</a></h2>
-
-<p>
-These parallelism facts of life are by no means specific to RCU, but
-the RCU implementation must abide by them.
-They therefore bear repeating:
-
-<ol>
-<li> Any CPU or task may be delayed at any time,
- and any attempts to avoid these delays by disabling
- preemption, interrupts, or whatever are completely futile.
- This is most obvious in preemptible user-level
- environments and in virtualized environments (where
- a given guest OS's VCPUs can be preempted at any time by
- the underlying hypervisor), but can also happen in bare-metal
- environments due to ECC errors, NMIs, and other hardware
- events.
- Although a delay of more than about 20 seconds can result
- in splats, the RCU implementation is obligated to use
- algorithms that can tolerate extremely long delays, but where
- &ldquo;extremely long&rdquo; is not long enough to allow
- wrap-around when incrementing a 64-bit counter.
-<li> Both the compiler and the CPU can reorder memory accesses.
- Where it matters, RCU must use compiler directives and
- memory-barrier instructions to preserve ordering.
-<li> Conflicting writes to memory locations in any given cache line
- will result in expensive cache misses.
- Greater numbers of concurrent writes and more-frequent
- concurrent writes will result in more dramatic slowdowns.
- RCU is therefore obligated to use algorithms that have
- sufficient locality to avoid significant performance and
- scalability problems.
-<li> As a rough rule of thumb, only one CPU's worth of processing
- may be carried out under the protection of any given exclusive
- lock.
- RCU must therefore use scalable locking designs.
-<li> Counters are finite, especially on 32-bit systems.
- RCU's use of counters must therefore tolerate counter wrap,
- or be designed such that counter wrap would take way more
- time than a single system is likely to run.
- An uptime of ten years is quite possible, a runtime
- of a century much less so.
- As an example of the latter, RCU's dyntick-idle nesting counter
- allows 54 bits for interrupt nesting level (this counter
- is 64 bits even on a 32-bit system).
- Overflowing this counter requires 2<sup>54</sup>
- half-interrupts on a given CPU without that CPU ever going idle.
- If a half-interrupt happened every microsecond, it would take
- 570 years of runtime to overflow this counter, which is currently
- believed to be an acceptably long time.
-<li> Linux systems can have thousands of CPUs running a single
- Linux kernel in a single shared-memory environment.
- RCU must therefore pay close attention to high-end scalability.
-</ol>
-
-<p>
-This last parallelism fact of life means that RCU must pay special
-attention to the preceding facts of life.
-The idea that Linux might scale to systems with thousands of CPUs would
-have been met with some skepticism in the 1990s, but these requirements
-would have otherwise have been unsurprising, even in the early 1990s.
-
-<h2><a name="Quality-of-Implementation Requirements">Quality-of-Implementation Requirements</a></h2>
-
-<p>
-These sections list quality-of-implementation requirements.
-Although an RCU implementation that ignores these requirements could
-still be used, it would likely be subject to limitations that would
-make it inappropriate for industrial-strength production use.
-Classes of quality-of-implementation requirements are as follows:
-
-<ol>
-<li> <a href="#Specialization">Specialization</a>
-<li> <a href="#Performance and Scalability">Performance and Scalability</a>
-<li> <a href="#Composability">Composability</a>
-<li> <a href="#Corner Cases">Corner Cases</a>
-</ol>
-
-<p>
-These classes is covered in the following sections.
-
-<h3><a name="Specialization">Specialization</a></h3>
-
-<p>
-RCU is and always has been intended primarily for read-mostly situations, as
-illustrated by the following figure.
-This means that RCU's read-side primitives are optimized, often at the
-expense of its update-side primitives.
-
-<p><img src="RCUApplicability.svg" alt="RCUApplicability.svg" width="70%"></p>
-
-<p>
-This focus on read-mostly situations means that RCU must interoperate
-with other synchronization primitives.
-For example, the <tt>add_gp()</tt> and <tt>remove_gp_synchronous()</tt>
-examples discussed earlier use RCU to protect readers and locking to
-coordinate updaters.
-However, the need extends much farther, requiring that a variety of
-synchronization primitives be legal within RCU read-side critical sections,
-including spinlocks, sequence locks, atomic operations, reference
-counters, and memory barriers.
-
-<p>@@QQ@@
-What about sleeping locks?
-<p>@@QQA@@
-These are forbidden within Linux-kernel RCU read-side critical sections
-because it is not legal to place a quiescent state (in this case,
-voluntary context switch) within an RCU read-side critical section.
-However, sleeping locks may be used within userspace RCU read-side critical
-sections, and also within Linux-kernel sleepable RCU
-<a href="#Sleepable RCU">(SRCU)</a>
-read-side critical sections.
-In addition, the -rt patchset turns spinlocks into a sleeping locks so
-that the corresponding critical sections can be preempted, which
-also means that these sleeplockified spinlocks (but not other sleeping locks!)
-may be acquire within -rt-Linux-kernel RCU read-side critical sections.
-
-<p>
-Note that it <i>is</i> legal for a normal RCU read-side critical section
-to conditionally acquire a sleeping locks (as in <tt>mutex_trylock()</tt>),
-but only as long as it does not loop indefinitely attempting to
-conditionally acquire that sleeping locks.
-The key point is that things like <tt>mutex_trylock()</tt>
-either return with the mutex held, or return an error indication if
-the mutex was not immediately available.
-Either way, <tt>mutex_trylock()</tt> returns immediately without sleeping.
-<p>@@QQE@@
-
-<p>
-It often comes as a surprise that many algorithms do not require a
-consistent view of data, but many can function in that mode,
-with network routing being the poster child.
-Internet routing algorithms take significant time to propagate
-updates, so that by the time an update arrives at a given system,
-that system has been sending network traffic the wrong way for
-a considerable length of time.
-Having a few threads continue to send traffic the wrong way for a
-few more milliseconds is clearly not a problem: In the worst case,
-TCP retransmissions will eventually get the data where it needs to go.
-In general, when tracking the state of the universe outside of the
-computer, some level of inconsistency must be tolerated due to
-speed-of-light delays if nothing else.
-
-<p>
-Furthermore, uncertainty about external state is inherent in many cases.
-For example, a pair of veternarians might use heartbeat to determine
-whether or not a given cat was alive.
-But how long should they wait after the last heartbeat to decide that
-the cat is in fact dead?
-Waiting less than 400 milliseconds makes no sense because this would
-mean that a relaxed cat would be considered to cycle between death
-and life more than 100 times per minute.
-Moreover, just as with human beings, a cat's heart might stop for
-some period of time, so the exact wait period is a judgment call.
-One of our pair of veternarians might wait 30 seconds before pronouncing
-the cat dead, while the other might insist on waiting a full minute.
-The two veternarians would then disagree on the state of the cat during
-the final 30 seconds of the minute following the last heartbeat, as
-fancifully illustrated below:
-
-<p><img src="2013-08-is-it-dead.png" alt="2013-08-is-it-dead.png" width="431"></p>
-
-<p>
-Interestingly enough, this same situation applies to hardware.
-When push comes to shove, how do we tell whether or not some
-external server has failed?
-We send messages to it periodically, and declare it failed if we
-don't receive a response within a given period of time.
-Policy decisions can usually tolerate short
-periods of inconsistency.
-The policy was decided some time ago, and is only now being put into
-effect, so a few milliseconds of delay is normally inconsequential.
-
-<p>
-However, there are algorithms that absolutely must see consistent data.
-For example, the translation between a user-level SystemV semaphore
-ID to the corresponding in-kernel data structure is protected by RCU,
-but it is absolutely forbidden to update a semaphore that has just been
-removed.
-In the Linux kernel, this need for consistency is accommodated by acquiring
-spinlocks located in the in-kernel data structure from within
-the RCU read-side critical section, and this is indicated by the
-green box in the figure above.
-Many other techniques may be used, and are in fact used within the
-Linux kernel.
-
-<p>
-In short, RCU is not required to maintain consistency, and other
-mechanisms may be used in concert with RCU when consistency is required.
-RCU's specialization allows it to do its job extremely well, and its
-ability to interoperate with other synchronization mechanisms allows
-the right mix of synchronization tools to be used for a given job.
-
-<h3><a name="Performance and Scalability">Performance and Scalability</a></h3>
-
-<p>
-Energy efficiency is a critical component of performance today,
-and Linux-kernel RCU implementations must therefore avoid unnecessarily
-awakening idle CPUs.
-I cannot claim that this requirement was premeditated.
-In fact, I learned of it during a telephone conversation in which I
-was given &ldquo;frank and open&rdquo; feedback on the importance
-of energy efficiency in battery-powered systems and on specific
-energy-efficiency shortcomings of the Linux-kernel RCU implementation.
-In my experience, the battery-powered embedded community will consider
-any unnecessary wakeups to be extremely unfriendly acts.
-So much so that mere Linux-kernel-mailing-list posts are
-insufficient to vent their ire.
-
-<p>
-Memory consumption is not particularly important for in most
-situations, and has become decreasingly
-so as memory sizes have expanded and memory
-costs have plummeted.
-However, as I learned from Matt Mackall's
-<a href="http://elinux.org/Linux_Tiny-FAQ">bloatwatch</a>
-efforts, memory footprint is critically important on single-CPU systems with
-non-preemptible (<tt>CONFIG_PREEMPT=n</tt>) kernels, and thus
-<a href="https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com">tiny RCU</a>
-was born.
-Josh Triplett has since taken over the small-memory banner with his
-<a href="https://tiny.wiki.kernel.org/">Linux kernel tinification</a>
-project, which resulted in
-<a href="#Sleepable RCU">SRCU</a>
-becoming optional for those kernels not needing it.
-
-<p>
-The remaining performance requirements are, for the most part,
-unsurprising.
-For example, in keeping with RCU's read-side specialization,
-<tt>rcu_dereference()</tt> should have negligible overhead (for
-example, suppression of a few minor compiler optimizations).
-Similarly, in non-preemptible environments, <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have exactly zero overhead.
-
-<p>
-In preemptible environments, in the case where the RCU read-side
-critical section was not preempted (as will be the case for the
-highest-priority real-time process), <tt>rcu_read_lock()</tt> and
-<tt>rcu_read_unlock()</tt> should have minimal overhead.
-In particular, they should not contain atomic read-modify-write
-operations, memory-barrier instructions, preemption disabling,
-interrupt disabling, or backwards branches.
-However, in the case where the RCU read-side critical section was preempted,
-<tt>rcu_read_unlock()</tt> may acquire spinlocks and disable interrupts.
-This is why it is better to nest an RCU read-side critical section
-within a preempt-disable region than vice versa, at least in cases
-where that critical section is short enough to avoid unduly degrading
-real-time latencies.
-
-<p>
-The <tt>synchronize_rcu()</tt> grace-period-wait primitive is
-optimized for throughput.
-It may therefore incur several milliseconds of latency in addition to
-the duration of the longest RCU read-side critical section.
-On the other hand, multiple concurrent invocations of
-<tt>synchronize_rcu()</tt> are required to use batching optimizations
-so that they can be satisfied by a single underlying grace-period-wait
-operation.
-For example, in the Linux kernel, it is not unusual for a single
-grace-period-wait operation to serve more than
-<a href="https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response">1,000 separate invocations</a>
-of <tt>synchronize_rcu()</tt>, thus amortizing the per-invocation
-overhead down to nearly zero.
-However, the grace-period optimization is also required to avoid
-measurable degradation of real-time scheduling and interrupt latencies.
-
-<p>
-In some cases, the multi-millisecond <tt>synchronize_rcu()</tt>
-latencies are unacceptable.
-In these cases, <tt>synchronize_rcu_expedited()</tt> may be used
-instead, reducing the grace-period latency down to a few tens of
-microseconds on small systems, at least in cases where the RCU read-side
-critical sections are short.
-There are currently no special latency requirements for
-<tt>synchronize_rcu_expedited()</tt> on large systems, but,
-consistent with the empirical nature of the RCU specification,
-that is subject to change.
-However, there most definitely are scalability requirements:
-A storm of <tt>synchronize_rcu_expedited()</tt> invocations on 4096
-CPUs should at least make reasonable forward progress.
-In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt>
-is permitted to impose modest degradation of real-time latency
-on non-idle online CPUs.
-That said, it will likely be necessary to take further steps to reduce this
-degradation, hopefully to roughly that of a scheduling-clock interrupt.
-
-<p>
-There are a number of situations where even
-<tt>synchronize_rcu_expedited()</tt>'s reduced grace-period
-latency is unacceptable.
-In these situations, the asynchronous <tt>call_rcu()</tt> can be
-used in place of <tt>synchronize_rcu()</tt> as follows:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2 int a;
- 3 int b;
- 4 struct rcu_head rh;
- 5 };
- 6
- 7 static void remove_gp_cb(struct rcu_head *rhp)
- 8 {
- 9 struct foo *p = container_of(rhp, struct foo, rh);
-10
-11 kfree(p);
-12 }
-13
-14 bool remove_gp_asynchronous(void)
-15 {
-16 struct foo *p;
-17
-18 spin_lock(&amp;gp_lock);
-19 p = rcu_dereference(gp);
-20 if (!p) {
-21 spin_unlock(&amp;gp_lock);
-22 return false;
-23 }
-24 rcu_assign_pointer(gp, NULL);
-25 call_rcu(&amp;p-&gt;rh, remove_gp_cb);
-26 spin_unlock(&amp;gp_lock);
-27 return true;
-28 }
-</pre>
-</blockquote>
-
-<p>
-A definition of <tt>struct foo</tt> is finally needed, and appears
-on lines&nbsp;1-5.
-The function <tt>remove_gp_cb()</tt> is passed to <tt>call_rcu()</tt>
-on line&nbsp;25, and will be invoked after the end of a subsequent
-grace period.
-This gets the same effect as <tt>remove_gp_synchronous()</tt>,
-but without forcing the updater to wait for a grace period to elapse.
-The <tt>call_rcu()</tt> function may be used in a number of
-situations where neither <tt>synchronize_rcu()</tt> nor
-<tt>synchronize_rcu_expedited()</tt> would be legal,
-including within preempt-disable code, <tt>local_bh_disable()</tt> code,
-interrupt-disable code, and interrupt handlers.
-However, even <tt>call_rcu()</tt> is illegal within NMI handlers.
-The callback function (<tt>remove_gp_cb()</tt> in this case) will be
-executed within softirq (software interrupt) environment within the
-Linux kernel,
-either within a real softirq handler or under the protection
-of <tt>local_bh_disable()</tt>.
-In both the Linux kernel and in userspace, it is bad practice to
-write an RCU callback function that takes too long.
-Long-running operations should be relegated to separate threads or
-(in the Linux kernel) workqueues.
-
-<p>@@QQ@@
-Why does line&nbsp;19 use <tt>rcu_access_pointer()</tt>?
-After all, <tt>call_rcu()</tt> on line&nbsp;25 stores into the
-structure, which would interact badly with concurrent insertions.
-Doesn't this mean that <tt>rcu_dereference()</tt> is required?
-<p>@@QQA@@
-Presumably the <tt>-&gt;gp_lock</tt> acquired on line&nbsp;18 excludes
-any changes, including any insertions that <tt>rcu_dereference()</tt>
-would protect against.
-Therefore, any insertions will be delayed until after <tt>-&gt;gp_lock</tt>
-is released on line&nbsp;25, which in turn means that
-<tt>rcu_access_pointer()</tt> suffices.
-<p>@@QQE@@
-
-<p>
-However, all that <tt>remove_gp_cb()</tt> is doing is
-invoking <tt>kfree()</tt> on the data element.
-This is a common idiom, and is supported by <tt>kfree_rcu()</tt>,
-which allows &ldquo;fire and forget&rdquo; operation as shown below:
-
-<blockquote>
-<pre>
- 1 struct foo {
- 2 int a;
- 3 int b;
- 4 struct rcu_head rh;
- 5 };
- 6
- 7 bool remove_gp_faf(void)
- 8 {
- 9 struct foo *p;
-10
-11 spin_lock(&amp;gp_lock);
-12 p = rcu_dereference(gp);
-13 if (!p) {
-14 spin_unlock(&amp;gp_lock);
-15 return false;
-16 }
-17 rcu_assign_pointer(gp, NULL);
-18 kfree_rcu(p, rh);
-19 spin_unlock(&amp;gp_lock);
-20 return true;
-21 }
-</pre>
-</blockquote>
-
-<p>
-Note that <tt>remove_gp_faf()</tt> simply invokes
-<tt>kfree_rcu()</tt> and proceeds, without any need to pay any
-further attention to the subsequent grace period and <tt>kfree()</tt>.
-It is permissible to invoke <tt>kfree_rcu()</tt> from the same
-environments as for <tt>call_rcu()</tt>.
-Interestingly enough, DYNIX/ptx had the equivalents of
-<tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>, but not
-<tt>synchronize_rcu()</tt>.
-This was due to the fact that RCU was not heavily used within DYNIX/ptx,
-so the very few places that needed something like
-<tt>synchronize_rcu()</tt> simply open-coded it.
-
-<p>@@QQ@@
-Earlier it was claimed that <tt>call_rcu()</tt> and
-<tt>kfree_rcu()</tt> allowed updaters to avoid being blocked
-by readers.
-But how can that be correct, given that the invocation of the callback
-and the freeing of the memory (respectively) must still wait for
-a grace period to elapse?
-<p>@@QQA@@
-We could define things this way, but keep in mind that this sort of
-definition would say that updates in garbage-collected languages
-cannot complete until the next time the garbage collector runs,
-which does not seem at all reasonable.
-The key point is that in most cases, an updater using either
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> can proceed to the
-next update as soon as it has invoked <tt>call_rcu()</tt> or
-<tt>kfree_rcu()</tt>, without having to wait for a subsequent
-grace period.
-<p>@@QQE@@
-
-<p>
-But what if the updater must wait for the completion of code to be
-executed after the end of the grace period, but has other tasks
-that can be carried out in the meantime?
-The polling-style <tt>get_state_synchronize_rcu()</tt> and
-<tt>cond_synchronize_rcu()</tt> functions may be used for this
-purpose, as shown below:
-
-<blockquote>
-<pre>
- 1 bool remove_gp_poll(void)
- 2 {
- 3 struct foo *p;
- 4 unsigned long s;
- 5
- 6 spin_lock(&amp;gp_lock);
- 7 p = rcu_access_pointer(gp);
- 8 if (!p) {
- 9 spin_unlock(&amp;gp_lock);
-10 return false;
-11 }
-12 rcu_assign_pointer(gp, NULL);
-13 spin_unlock(&amp;gp_lock);
-14 s = get_state_synchronize_rcu();
-15 do_something_while_waiting();
-16 cond_synchronize_rcu(s);
-17 kfree(p);
-18 return true;
-19 }
-</pre>
-</blockquote>
-
-<p>
-On line&nbsp;14, <tt>get_state_synchronize_rcu()</tt> obtains a
-&ldquo;cookie&rdquo; from RCU,
-then line&nbsp;15 carries out other tasks,
-and finally, line&nbsp;16 returns immediately if a grace period has
-elapsed in the meantime, but otherwise waits as required.
-The need for <tt>get_state_synchronize_rcu</tt> and
-<tt>cond_synchronize_rcu()</tt> has appeared quite recently,
-so it is too early to tell whether they will stand the test of time.
-
-<p>
-RCU thus provides a range of tools to allow updaters to strike the
-required tradeoff between latency, flexibility and CPU overhead.
-
-<h3><a name="Composability">Composability</a></h3>
-
-<p>
-Composability has received much attention in recent years, perhaps in part
-due to the collision of multicore hardware with object-oriented techniques
-designed in single-threaded environments for single-threaded use.
-And in theory, RCU read-side critical sections may be composed, and in
-fact may be nested arbitrarily deeply.
-In practice, as with all real-world implementations of composable
-constructs, there are limitations.
-
-<p>
-Implementations of RCU for which <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> generate no code, such as
-Linux-kernel RCU when <tt>CONFIG_PREEMPT=n</tt>, can be
-nested arbitrarily deeply.
-After all, there is no overhead.
-Except that if all these instances of <tt>rcu_read_lock()</tt>
-and <tt>rcu_read_unlock()</tt> are visible to the compiler,
-compilation will eventually fail due to exhausting memory,
-mass storage, or user patience, whichever comes first.
-If the nesting is not visible to the compiler, as is the case with
-mutually recursive functions each in its own translation unit,
-stack overflow will result.
-If the nesting takes the form of loops, either the control variable
-will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
-Nevertheless, this class of RCU implementations is one
-of the most composable constructs in existence.
-
-<p>
-RCU implementations that explicitly track nesting depth
-are limited by the nesting-depth counter.
-For example, the Linux kernel's preemptible RCU limits nesting to
-<tt>INT_MAX</tt>.
-This should suffice for almost all practical purposes.
-That said, a consecutive pair of RCU read-side critical sections
-between which there is an operation that waits for a grace period
-cannot be enclosed in another RCU read-side critical section.
-This is because it is not legal to wait for a grace period within
-an RCU read-side critical section: To do so would result either
-in deadlock or
-in RCU implicitly splitting the enclosing RCU read-side critical
-section, neither of which is conducive to a long-lived and prosperous
-kernel.
-
-<p>
-It is worth noting that RCU is not alone in limiting composability.
-For example, many transactional-memory implementations prohibit
-composing a pair of transactions separated by an irrevocable
-operation (for example, a network receive operation).
-For another example, lock-based critical sections can be composed
-surprisingly freely, but only if deadlock is avoided.
-
-<p>
-In short, although RCU read-side critical sections are highly composable,
-care is required in some situations, just as is the case for any other
-composable synchronization mechanism.
-
-<h3><a name="Corner Cases">Corner Cases</a></h3>
-
-<p>
-A given RCU workload might have an endless and intense stream of
-RCU read-side critical sections, perhaps even so intense that there
-was never a point in time during which there was not at least one
-RCU read-side critical section in flight.
-RCU cannot allow this situation to block grace periods: As long as
-all the RCU read-side critical sections are finite, grace periods
-must also be finite.
-
-<p>
-That said, preemptible RCU implementations could potentially result
-in RCU read-side critical sections being preempted for long durations,
-which has the effect of creating a long-duration RCU read-side
-critical section.
-This situation can arise only in heavily loaded systems, but systems using
-real-time priorities are of course more vulnerable.
-Therefore, RCU priority boosting is provided to help deal with this
-case.
-That said, the exact requirements on RCU priority boosting will likely
-evolve as more experience accumulates.
-
-<p>
-Other workloads might have very high update rates.
-Although one can argue that such workloads should instead use
-something other than RCU, the fact remains that RCU must
-handle such workloads gracefully.
-This requirement is another factor driving batching of grace periods,
-but it is also the driving force behind the checks for large numbers
-of queued RCU callbacks in the <tt>call_rcu()</tt> code path.
-Finally, high update rates should not delay RCU read-side critical
-sections, although some read-side delays can occur when using
-<tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use
-of <tt>try_stop_cpus()</tt>.
-(In the future, <tt>synchronize_rcu_expedited()</tt> will be
-converted to use lighter-weight inter-processor interrupts (IPIs),
-but this will still disturb readers, though to a much smaller degree.)
-
-<p>
-Although all three of these corner cases were understood in the early
-1990s, a simple user-level test consisting of <tt>close(open(path))</tt>
-in a tight loop
-in the early 2000s suddenly provided a much deeper appreciation of the
-high-update-rate corner case.
-This test also motivated addition of some RCU code to react to high update
-rates, for example, if a given CPU finds itself with more than 10,000
-RCU callbacks queued, it will cause RCU to take evasive action by
-more aggressively starting grace periods and more aggressively forcing
-completion of grace-period processing.
-This evasive action causes the grace period to complete more quickly,
-but at the cost of restricting RCU's batching optimizations, thus
-increasing the CPU overhead incurred by that grace period.
-
-<h2><a name="Software-Engineering Requirements">
-Software-Engineering Requirements</a></h2>
-
-<p>
-Between Murphy's Law and &ldquo;To err is human&rdquo;, it is necessary to
-guard against mishaps and misuse:
-
-<ol>
-<li> It is all too easy to forget to use <tt>rcu_read_lock()</tt>
- everywhere that it is needed, so kernels built with
- <tt>CONFIG_PROVE_RCU=y</tt> will spat if
- <tt>rcu_dereference()</tt> is used outside of an
- RCU read-side critical section.
- Update-side code can use <tt>rcu_dereference_protected()</tt>,
- which takes a
- <a href="https://lwn.net/Articles/371986/">lockdep expression</a>
- to indicate what is providing the protection.
- If the indicated protection is not provided, a lockdep splat
- is emitted.
-
- <p>
- Code shared between readers and updaters can use
- <tt>rcu_dereference_check()</tt>, which also takes a
- lockdep expression, and emits a lockdep splat if neither
- <tt>rcu_read_lock()</tt> nor the indicated protection
- is in place.
- In addition, <tt>rcu_dereference_raw()</tt> is used in those
- (hopefully rare) cases where the required protection cannot
- be easily described.
- Finally, <tt>rcu_read_lock_held()</tt> is provided to
- allow a function to verify that it has been invoked within
- an RCU read-side critical section.
- I was made aware of this set of requirements shortly after Thomas
- Gleixner audited a number of RCU uses.
-<li> A given function might wish to check for RCU-related preconditions
- upon entry, before using any other RCU API.
- The <tt>rcu_lockdep_assert()</tt> does this job,
- asserting the expression in kernels having lockdep enabled
- and doing nothing otherwise.
-<li> It is also easy to forget to use <tt>rcu_assign_pointer()</tt>
- and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
- substituting a simple assignment.
- To catch this sort of error, a given RCU-protected pointer may be
- tagged with <tt>__rcu</tt>, after which running sparse
- with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain
- about simple-assignment accesses to that pointer.
- Arnd Bergmann made me aware of this requirement, and also
- supplied the needed
- <a href="https://lwn.net/Articles/376011/">patch series</a>.
-<li> Kernels built with <tt>CONFIG_DEBUG_OBJECTS_RCU_HEAD=y</tt>
- will splat if a data element is passed to <tt>call_rcu()</tt>
- twice in a row, without a grace period in between.
- (This error is similar to a double free.)
- The corresponding <tt>rcu_head</tt> structures that are
- dynamically allocated are automatically tracked, but
- <tt>rcu_head</tt> structures allocated on the stack
- must be initialized with <tt>init_rcu_head_on_stack()</tt>
- and cleaned up with <tt>destroy_rcu_head_on_stack()</tt>.
- Similarly, statically allocated non-stack <tt>rcu_head</tt>
- structures must be initialized with <tt>init_rcu_head()</tt>
- and cleaned up with <tt>destroy_rcu_head()</tt>.
- Mathieu Desnoyers made me aware of this requirement, and also
- supplied the needed
- <a href="https://lkml.kernel.org/g/20100319013024.GA28456@Krystal">patch</a>.
-<li> An infinite loop in an RCU read-side critical section will
- eventually trigger an RCU CPU stall warning splat, with
- the duration of &ldquo;eventually&rdquo; being controlled by the
- <tt>RCU_CPU_STALL_TIMEOUT</tt> <tt>Kconfig</tt> option, or,
- alternatively, by the
- <tt>rcupdate.rcu_cpu_stall_timeout</tt> boot/sysfs
- parameter.
- However, RCU is not obligated to produce this splat
- unless there is a grace period waiting on that particular
- RCU read-side critical section.
- <p>
- Some extreme workloads might intentionally delay
- RCU grace periods, and systems running those workloads can
- be booted with <tt>rcupdate.rcu_cpu_stall_suppress</tt>
- to suppress the splats.
- This kernel parameter may also be set via <tt>sysfs</tt>.
- Furthermore, RCU CPU stall warnings are counter-productive
- during sysrq dumps and during panics.
- RCU therefore supplies the <tt>rcu_sysrq_start()</tt> and
- <tt>rcu_sysrq_end()</tt> API members to be called before
- and after long sysrq dumps.
- RCU also supplies the <tt>rcu_panic()</tt> notifier that is
- automatically invoked at the beginning of a panic to suppress
- further RCU CPU stall warnings.
-
- <p>
- This requirement made itself known in the early 1990s, pretty
- much the first time that it was necessary to debug a CPU stall.
- That said, the initial implementation in DYNIX/ptx was quite
- generic in comparison with that of Linux.
-<li> Although it would be very good to detect pointers leaking out
- of RCU read-side critical sections, there is currently no
- good way of doing this.
- One complication is the need to distinguish between pointers
- leaking and pointers that have been handed off from RCU to
- some other synchronization mechanism, for example, reference
- counting.
-<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
- information is provided via both debugfs and event tracing.
-<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and
- <tt>rcu_dereference()</tt> to create typical linked
- data structures can be surprisingly error-prone.
- Therefore, RCU-protected
- <a href="https://lwn.net/Articles/609973/#RCU List APIs">linked lists</a>
- and, more recently, RCU-protected
- <a href="https://lwn.net/Articles/612100/">hash tables</a>
- are available.
- Many other special-purpose RCU-protected data structures are
- available in the Linux kernel and the userspace RCU library.
-<li> Some linked structures are created at compile time, but still
- require <tt>__rcu</tt> checking.
- The <tt>RCU_POINTER_INITIALIZER()</tt> macro serves this
- purpose.
-<li> It is not necessary to use <tt>rcu_assign_pointer()</tt>
- when creating linked structures that are to be published via
- a single external pointer.
- The <tt>RCU_INIT_POINTER()</tt> macro is provided for
- this task and also for assigning <tt>NULL</tt> pointers
- at runtime.
-</ol>
-
-<p>
-This not a hard-and-fast list: RCU's diagnostic capabilities will
-continue to be guided by the number and type of usage bugs found
-in real-world RCU usage.
-
-<h2><a name="Linux Kernel Complications">Linux Kernel Complications</a></h2>
-
-<p>
-The Linux kernel provides an interesting environment for all kinds of
-software, including RCU.
-Some of the relevant points of interest are as follows:
-
-<ol>
-<li> <a href="#Configuration">Configuration</a>.
-<li> <a href="#Firmware Interface">Firmware Interface</a>.
-<li> <a href="#Early Boot">Early Boot</a>.
-<li> <a href="#Interrupts and NMIs">
- Interrupts and non-maskable interrupts (NMIs)</a>.
-<li> <a href="#Loadable Modules">Loadable Modules</a>.
-<li> <a href="#Hotplug CPU">Hotplug CPU</a>.
-<li> <a href="#Scheduler and RCU">Scheduler and RCU</a>.
-<li> <a href="#Tracing and RCU">Tracing and RCU</a>.
-<li> <a href="#Energy Efficiency">Energy Efficiency</a>.
-<li> <a href="#Memory Efficiency">Memory Efficiency</a>.
-<li> <a href="#Performance, Scalability, Response Time, and Reliability">
- Performance, Scalability, Response Time, and Reliability</a>.
-</ol>
-
-<p>
-This list is probably incomplete, but it does give a feel for the
-most notable Linux-kernel complications.
-Each of the following sections covers one of the above topics.
-
-<h3><a name="Configuration">Configuration</a></h3>
-
-<p>
-RCU's goal is automatic configuration, so that almost nobody
-needs to worry about RCU's <tt>Kconfig</tt> options.
-And for almost all users, RCU does in fact work well
-&ldquo;out of the box.&rdquo;
-
-<p>
-However, there are specialized use cases that are handled by
-kernel boot parameters and <tt>Kconfig</tt> options.
-Unfortunately, the <tt>Kconfig</tt> system will explicitly ask users
-about new <tt>Kconfig</tt> options, which requires almost all of them
-be hidden behind a <tt>CONFIG_RCU_EXPERT</tt> <tt>Kconfig</tt> option.
-
-<p>
-This all should be quite obvious, but the fact remains that
-Linus Torvalds recently had to
-<a href="https://lkml.kernel.org/g/CA+55aFy4wcCwaL4okTs8wXhGZ5h-ibecy_Meg9C4MNQrUnwMcg@mail.gmail.com">remind</a>
-me of this requirement.
-
-<h3><a name="Firmware Interface">Firmware Interface</a></h3>
-
-<p>
-In many cases, kernel obtains information about the system from the
-firmware, and sometimes things are lost in translation.
-Or the translation is accurate, but the original message is bogus.
-
-<p>
-For example, some systems' firmware overreports the number of CPUs,
-sometimes by a large factor.
-If RCU naively believed the firmware, as it used to do,
-it would create too many per-CPU kthreads.
-Although the resulting system will still run correctly, the extra
-kthreads needlessly consume memory and can cause confusion
-when they show up in <tt>ps</tt> listings.
-
-<p>
-RCU must therefore wait for a given CPU to actually come online before
-it can allow itself to believe that the CPU actually exists.
-The resulting &ldquo;ghost CPUs&rdquo; (which are never going to
-come online) cause a number of
-<a href="https://paulmck.livejournal.com/37494.html">interesting complications</a>.
-
-<h3><a name="Early Boot">Early Boot</a></h3>
-
-<p>
-The Linux kernel's boot sequence is an interesting process,
-and RCU is used early, even before <tt>rcu_init()</tt>
-is invoked.
-In fact, a number of RCU's primitives can be used as soon as the
-initial task's <tt>task_struct</tt> is available and the
-boot CPU's per-CPU variables are set up.
-The read-side primitives (<tt>rcu_read_lock()</tt>,
-<tt>rcu_read_unlock()</tt>, <tt>rcu_dereference()</tt>,
-and <tt>rcu_access_pointer()</tt>) will operate normally very early on,
-as will <tt>rcu_assign_pointer()</tt>.
-
-<p>
-Although <tt>call_rcu()</tt> may be invoked at any
-time during boot, callbacks are not guaranteed to be invoked until after
-the scheduler is fully up and running.
-This delay in callback invocation is due to the fact that RCU does not
-invoke callbacks until it is fully initialized, and this full initialization
-cannot occur until after the scheduler has initialized itself to the
-point where RCU can spawn and run its kthreads.
-In theory, it would be possible to invoke callbacks earlier,
-however, this is not a panacea because there would be severe restrictions
-on what operations those callbacks could invoke.
-
-<p>
-Perhaps surprisingly, <tt>synchronize_rcu()</tt>,
-<a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a>
-(<a href="#Bottom-Half Flavor">discussed below</a>),
-and
-<a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>
-will all operate normally
-during very early boot, the reason being that there is only one CPU
-and preemption is disabled.
-This means that the call <tt>synchronize_rcu()</tt> (or friends)
-itself is a quiescent
-state and thus a grace period, so the early-boot implementation can
-be a no-op.
-
-<p>
-Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt>
-continue to operate normally through the remainder of boot, courtesy
-of the fact that preemption is disabled across their RCU read-side
-critical sections and also courtesy of the fact that there is still
-only one CPU.
-However, once the scheduler starts initializing, preemption is enabled.
-There is still only a single CPU, but the fact that preemption is enabled
-means that the no-op implementation of <tt>synchronize_rcu()</tt> no
-longer works in <tt>CONFIG_PREEMPT=y</tt> kernels.
-Therefore, as soon as the scheduler starts initializing, the early-boot
-fastpath is disabled.
-This means that <tt>synchronize_rcu()</tt> switches to its runtime
-mode of operation where it posts callbacks, which in turn means that
-any call to <tt>synchronize_rcu()</tt> will block until the corresponding
-callback is invoked.
-Unfortunately, the callback cannot be invoked until RCU's runtime
-grace-period machinery is up and running, which cannot happen until
-the scheduler has initialized itself sufficiently to allow RCU's
-kthreads to be spawned.
-Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler
-initialization can result in deadlock.
-
-<p>@@QQ@@
-So what happens with <tt>synchronize_rcu()</tt> during
-scheduler initialization for <tt>CONFIG_PREEMPT=n</tt>
-kernels?
-<p>@@QQA@@
-In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt>
-maps directly to <tt>synchronize_sched()</tt>.
-Therefore, <tt>synchronize_rcu()</tt> works normally throughout
-boot in <tt>CONFIG_PREEMPT=n</tt> kernels.
-However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels,
-so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt>
-during scheduler initialization.
-<p>@@QQE@@
-
-<p>
-I learned of these boot-time requirements as a result of a series of
-system hangs.
-
-<h3><a name="Interrupts and NMIs">Interrupts and NMIs</a></h3>
-
-<p>
-The Linux kernel has interrupts, and RCU read-side critical sections are
-legal within interrupt handlers and within interrupt-disabled regions
-of code, as are invocations of <tt>call_rcu()</tt>.
-
-<p>
-Some Linux-kernel architectures can enter an interrupt handler from
-non-idle process context, and then just never leave it, instead stealthily
-transitioning back to process context.
-This trick is sometimes used to invoke system calls from inside the kernel.
-These &ldquo;half-interrupts&rdquo; mean that RCU has to be very careful
-about how it counts interrupt nesting levels.
-I learned of this requirement the hard way during a rewrite
-of RCU's dyntick-idle code.
-
-<p>
-The Linux kernel has non-maskable interrupts (NMIs), and
-RCU read-side critical sections are legal within NMI handlers.
-Thankfully, RCU update-side primitives, including
-<tt>call_rcu()</tt>, are prohibited within NMI handlers.
-
-<p>
-The name notwithstanding, some Linux-kernel architectures
-can have nested NMIs, which RCU must handle correctly.
-Andy Lutomirski
-<a href="https://lkml.kernel.org/g/CALCETrXLq1y7e_dKFPgou-FKHB6Pu-r8+t-6Ds+8=va7anBWDA@mail.gmail.com">surprised me</a>
-with this requirement;
-he also kindly surprised me with
-<a href="https://lkml.kernel.org/g/CALCETrXSY9JpW3uE6H8WYk81sg56qasA2aqmjMPsq5dOtzso=g@mail.gmail.com">an algorithm</a>
-that meets this requirement.
-
-<h3><a name="Loadable Modules">Loadable Modules</a></h3>
-
-<p>
-The Linux kernel has loadable modules, and these modules can
-also be unloaded.
-After a given module has been unloaded, any attempt to call
-one of its functions results in a segmentation fault.
-The module-unload functions must therefore cancel any
-delayed calls to loadable-module functions, for example,
-any outstanding <tt>mod_timer()</tt> must be dealt with
-via <tt>del_timer_sync()</tt> or similar.
-
-<p>
-Unfortunately, there is no way to cancel an RCU callback;
-once you invoke <tt>call_rcu()</tt>, the callback function is
-going to eventually be invoked, unless the system goes down first.
-Because it is normally considered socially irresponsible to crash the system
-in response to a module unload request, we need some other way
-to deal with in-flight RCU callbacks.
-
-<p>
-RCU therefore provides
-<tt><a href="https://lwn.net/Articles/217484/">rcu_barrier()</a></tt>,
-which waits until all in-flight RCU callbacks have been invoked.
-If a module uses <tt>call_rcu()</tt>, its exit function should therefore
-prevent any future invocation of <tt>call_rcu()</tt>, then invoke
-<tt>rcu_barrier()</tt>.
-In theory, the underlying module-unload code could invoke
-<tt>rcu_barrier()</tt> unconditionally, but in practice this would
-incur unacceptable latencies.
-
-<p>
-Nikita Danilov noted this requirement for an analogous filesystem-unmount
-situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU.
-The need for <tt>rcu_barrier()</tt> for module unloading became
-apparent later.
-
-<h3><a name="Hotplug CPU">Hotplug CPU</a></h3>
-
-<p>
-The Linux kernel supports CPU hotplug, which means that CPUs
-can come and go.
-It is of course illegal to use any RCU API member from an offline CPU.
-This requirement was present from day one in DYNIX/ptx, but
-on the other hand, the Linux kernel's CPU-hotplug implementation
-is &ldquo;interesting.&rdquo;
-
-<p>
-The Linux-kernel CPU-hotplug implementation has notifiers that
-are used to allow the various kernel subsystems (including RCU)
-to respond appropriately to a given CPU-hotplug operation.
-Most RCU operations may be invoked from CPU-hotplug notifiers,
-including even normal synchronous grace-period operations
-such as <tt>synchronize_rcu()</tt>.
-However, expedited grace-period operations such as
-<tt>synchronize_rcu_expedited()</tt> are not supported,
-due to the fact that current implementations block CPU-hotplug
-operations, which could result in deadlock.
-
-<p>
-In addition, all-callback-wait operations such as
-<tt>rcu_barrier()</tt> are also not supported, due to the
-fact that there are phases of CPU-hotplug operations where
-the outgoing CPU's callbacks will not be invoked until after
-the CPU-hotplug operation ends, which could also result in deadlock.
-
-<h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3>
-
-<p>
-RCU depends on the scheduler, and the scheduler uses RCU to
-protect some of its data structures.
-This means the scheduler is forbidden from acquiring
-the runqueue locks and the priority-inheritance locks
-in the middle of an outermost RCU read-side critical section unless either
-(1)&nbsp;it releases them before exiting that same
-RCU read-side critical section, or
-(2)&nbsp;interrupts are disabled across
-that entire RCU read-side critical section.
-This same prohibition also applies (recursively!) to any lock that is acquired
-while holding any lock to which this prohibition applies.
-Adhering to this rule prevents preemptible RCU from invoking
-<tt>rcu_read_unlock_special()</tt> while either runqueue or
-priority-inheritance locks are held, thus avoiding deadlock.
-
-<p>
-Prior to v4.4, it was only necessary to disable preemption across
-RCU read-side critical sections that acquired scheduler locks.
-In v4.4, expedited grace periods started using IPIs, and these
-IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
-Therefore, this expedited-grace-period change required disabling of
-interrupts, not just preemption.
-
-<p>
-For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must be written carefully to avoid similar deadlocks.
-In particular, <tt>rcu_read_unlock()</tt> must tolerate an
-interrupt where the interrupt handler invokes both
-<tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-This possibility requires <tt>rcu_read_unlock()</tt> to use
-negative nesting levels to avoid destructive recursion via
-interrupt handler's use of RCU.
-
-<p>
-This pair of mutual scheduler-RCU requirements came as a
-<a href="https://lwn.net/Articles/453002/">complete surprise</a>.
-
-<p>
-As noted above, RCU makes use of kthreads, and it is necessary to
-avoid excessive CPU-time accumulation by these kthreads.
-This requirement was no surprise, but RCU's violation of it
-when running context-switch-heavy workloads when built with
-<tt>CONFIG_NO_HZ_FULL=y</tt>
-<a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
-RCU has made good progress towards meeting this requirement, even
-for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
-but there is room for further improvement.
-
-<h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
-
-<p>
-It is possible to use tracing on RCU code, but tracing itself
-uses RCU.
-For this reason, <tt>rcu_dereference_raw_notrace()</tt>
-is provided for use by tracing, which avoids the destructive
-recursion that could otherwise ensue.
-This API is also used by virtualization in some architectures,
-where RCU readers execute in environments in which tracing
-cannot be used.
-The tracing folks both located the requirement and provided the
-needed fix, so this surprise requirement was relatively painless.
-
-<h3><a name="Energy Efficiency">Energy Efficiency</a></h3>
-
-<p>
-Interrupting idle CPUs is considered socially unacceptable,
-especially by people with battery-powered embedded systems.
-RCU therefore conserves energy by detecting which CPUs are
-idle, including tracking CPUs that have been interrupted from idle.
-This is a large part of the energy-efficiency requirement,
-so I learned of this via an irate phone call.
-
-<p>
-Because RCU avoids interrupting idle CPUs, it is illegal to
-execute an RCU read-side critical section on an idle CPU.
-(Kernels built with <tt>CONFIG_PROVE_RCU=y</tt> will splat
-if you try it.)
-The <tt>RCU_NONIDLE()</tt> macro and <tt>_rcuidle</tt>
-event tracing is provided to work around this restriction.
-In addition, <tt>rcu_is_watching()</tt> may be used to
-test whether or not it is currently legal to run RCU read-side
-critical sections on this CPU.
-I learned of the need for diagnostics on the one hand
-and <tt>RCU_NONIDLE()</tt> on the other while inspecting
-idle-loop code.
-Steven Rostedt supplied <tt>_rcuidle</tt> event tracing,
-which is used quite heavily in the idle loop.
-
-<p>
-It is similarly socially unacceptable to interrupt an
-<tt>nohz_full</tt> CPU running in userspace.
-RCU must therefore track <tt>nohz_full</tt> userspace
-execution.
-And in
-<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
-kernels, RCU must separately track idle CPUs on the one hand and
-CPUs that are either idle or executing in userspace on the other.
-In both cases, RCU must be able to sample state at two points in
-time, and be able to determine whether or not some other CPU spent
-any time idle and/or executing in userspace.
-
-<p>
-These energy-efficiency requirements have proven quite difficult to
-understand and to meet, for example, there have been more than five
-clean-sheet rewrites of RCU's energy-efficiency code, the last of
-which was finally able to demonstrate
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/AMPenergy.2013.04.19a.pdf">real energy savings running on real hardware [PDF]</a>.
-As noted earlier,
-I learned of many of these requirements via angry phone calls:
-Flaming me on the Linux-kernel mailing list was apparently not
-sufficient to fully vent their ire at RCU's energy-efficiency bugs!
-
-<h3><a name="Memory Efficiency">Memory Efficiency</a></h3>
-
-<p>
-Although small-memory non-realtime systems can simply use Tiny RCU,
-code size is only one aspect of memory efficiency.
-Another aspect is the size of the <tt>rcu_head</tt> structure
-used by <tt>call_rcu()</tt> and <tt>kfree_rcu()</tt>.
-Although this structure contains nothing more than a pair of pointers,
-it does appear in many RCU-protected data structures, including
-some that are size critical.
-The <tt>page</tt> structure is a case in point, as evidenced by
-the many occurrences of the <tt>union</tt> keyword within that structure.
-
-<p>
-This need for memory efficiency is one reason that RCU uses hand-crafted
-singly linked lists to track the <tt>rcu_head</tt> structures that
-are waiting for a grace period to elapse.
-It is also the reason why <tt>rcu_head</tt> structures do not contain
-debug information, such as fields tracking the file and line of the
-<tt>call_rcu()</tt> or <tt>kfree_rcu()</tt> that posted them.
-Although this information might appear in debug-only kernel builds at some
-point, in the meantime, the <tt>-&gt;func</tt> field will often provide
-the needed debug information.
-
-<p>
-However, in some cases, the need for memory efficiency leads to even
-more extreme measures.
-Returning to the <tt>page</tt> structure, the <tt>rcu_head</tt> field
-shares storage with a great many other structures that are used at
-various points in the corresponding page's lifetime.
-In order to correctly resolve certain
-<a href="https://lkml.kernel.org/g/1439976106-137226-1-git-send-email-kirill.shutemov@linux.intel.com">race conditions</a>,
-the Linux kernel's memory-management subsystem needs a particular bit
-to remain zero during all phases of grace-period processing,
-and that bit happens to map to the bottom bit of the
-<tt>rcu_head</tt> structure's <tt>-&gt;next</tt> field.
-RCU makes this guarantee as long as <tt>call_rcu()</tt>
-is used to post the callback, as opposed to <tt>kfree_rcu()</tt>
-or some future &ldquo;lazy&rdquo;
-variant of <tt>call_rcu()</tt> that might one day be created for
-energy-efficiency purposes.
-
-<h3><a name="Performance, Scalability, Response Time, and Reliability">
-Performance, Scalability, Response Time, and Reliability</a></h3>
-
-<p>
-Expanding on the
-<a href="#Performance and Scalability">earlier discussion</a>,
-RCU is used heavily by hot code paths in performance-critical
-portions of the Linux kernel's networking, security, virtualization,
-and scheduling code paths.
-RCU must therefore use efficient implementations, especially in its
-read-side primitives.
-To that end, it would be good if preemptible RCU's implementation
-of <tt>rcu_read_lock()</tt> could be inlined, however, doing
-this requires resolving <tt>#include</tt> issues with the
-<tt>task_struct</tt> structure.
-
-<p>
-The Linux kernel supports hardware configurations with up to
-4096 CPUs, which means that RCU must be extremely scalable.
-Algorithms that involve frequent acquisitions of global locks or
-frequent atomic operations on global variables simply cannot be
-tolerated within the RCU implementation.
-RCU therefore makes heavy use of a combining tree based on the
-<tt>rcu_node</tt> structure.
-RCU is required to tolerate all CPUs continuously invoking any
-combination of RCU's runtime primitives with minimal per-operation
-overhead.
-In fact, in many cases, increasing load must <i>decrease</i> the
-per-operation overhead, witness the batching optimizations for
-<tt>synchronize_rcu()</tt>, <tt>call_rcu()</tt>,
-<tt>synchronize_rcu_expedited()</tt>, and <tt>rcu_barrier()</tt>.
-As a general rule, RCU must cheerfully accept whatever the
-rest of the Linux kernel decides to throw at it.
-
-<p>
-The Linux kernel is used for real-time workloads, especially
-in conjunction with the
-<a href="https://rt.wiki.kernel.org/index.php/Main_Page">-rt patchset</a>.
-The real-time-latency response requirements are such that the
-traditional approach of disabling preemption across RCU
-read-side critical sections is inappropriate.
-Kernels built with <tt>CONFIG_PREEMPT=y</tt> therefore
-use an RCU implementation that allows RCU read-side critical
-sections to be preempted.
-This requirement made its presence known after users made it
-clear that an earlier
-<a href="https://lwn.net/Articles/107930/">real-time patch</a>
-did not meet their needs, in conjunction with some
-<a href="https://lkml.kernel.org/g/20050318002026.GA2693@us.ibm.com">RCU issues</a>
-encountered by a very early version of the -rt patchset.
-
-<p>
-In addition, RCU must make do with a sub-100-microsecond real-time latency
-budget.
-In fact, on smaller systems with the -rt patchset, the Linux kernel
-provides sub-20-microsecond real-time latencies for the whole kernel,
-including RCU.
-RCU's scalability and latency must therefore be sufficient for
-these sorts of configurations.
-To my surprise, the sub-100-microsecond real-time latency budget
-<a href="http://www.rdrop.com/users/paulmck/realtime/paper/bigrt.2013.01.31a.LCA.pdf">
-applies to even the largest systems [PDF]</a>,
-up to and including systems with 4096 CPUs.
-This real-time requirement motivated the grace-period kthread, which
-also simplified handling of a number of race conditions.
-
-<p>
-Finally, RCU's status as a synchronization primitive means that
-any RCU failure can result in arbitrary memory corruption that can be
-extremely difficult to debug.
-This means that RCU must be extremely reliable, which in
-practice also means that RCU must have an aggressive stress-test
-suite.
-This stress-test suite is called <tt>rcutorture</tt>.
-
-<p>
-Although the need for <tt>rcutorture</tt> was no surprise,
-the current immense popularity of the Linux kernel is posing
-interesting&mdash;and perhaps unprecedented&mdash;validation
-challenges.
-To see this, keep in mind that there are well over one billion
-instances of the Linux kernel running today, given Android
-smartphones, Linux-powered televisions, and servers.
-This number can be expected to increase sharply with the advent of
-the celebrated Internet of Things.
-
-<p>
-Suppose that RCU contains a race condition that manifests on average
-once per million years of runtime.
-This bug will be occurring about three times per <i>day</i> across
-the installed base.
-RCU could simply hide behind hardware error rates, given that no one
-should really expect their smartphone to last for a million years.
-However, anyone taking too much comfort from this thought should
-consider the fact that in most jurisdictions, a successful multi-year
-test of a given mechanism, which might include a Linux kernel,
-suffices for a number of types of safety-critical certifications.
-In fact, rumor has it that the Linux kernel is already being used
-in production for safety-critical applications.
-I don't know about you, but I would feel quite bad if a bug in RCU
-killed someone.
-Which might explain my recent focus on validation and verification.
-
-<h2><a name="Other RCU Flavors">Other RCU Flavors</a></h2>
-
-<p>
-One of the more surprising things about RCU is that there are now
-no fewer than five <i>flavors</i>, or API families.
-In addition, the primary flavor that has been the sole focus up to
-this point has two different implementations, non-preemptible and
-preemptible.
-The other four flavors are listed below, with requirements for each
-described in a separate section.
-
-<ol>
-<li> <a href="#Bottom-Half Flavor">Bottom-Half Flavor</a>
-<li> <a href="#Sched Flavor">Sched Flavor</a>
-<li> <a href="#Sleepable RCU">Sleepable RCU</a>
-<li> <a href="#Tasks RCU">Tasks RCU</a>
-</ol>
-
-<h3><a name="Bottom-Half Flavor">Bottom-Half Flavor</a></h3>
-
-<p>
-The softirq-disable (AKA &ldquo;bottom-half&rdquo;,
-hence the &ldquo;_bh&rdquo; abbreviations)
-flavor of RCU, or <i>RCU-bh</i>, was developed by
-Dipankar Sarma to provide a flavor of RCU that could withstand the
-network-based denial-of-service attacks researched by Robert
-Olsson.
-These attacks placed so much networking load on the system
-that some of the CPUs never exited softirq execution,
-which in turn prevented those CPUs from ever executing a context switch,
-which, in the RCU implementation of that time, prevented grace periods
-from ever ending.
-The result was an out-of-memory condition and a system hang.
-
-<p>
-The solution was the creation of RCU-bh, which does
-<tt>local_bh_disable()</tt>
-across its read-side critical sections, and which uses the transition
-from one type of softirq processing to another as a quiescent state
-in addition to context switch, idle, user mode, and offline.
-This means that RCU-bh grace periods can complete even when some of
-the CPUs execute in softirq indefinitely, thus allowing algorithms
-based on RCU-bh to withstand network-based denial-of-service attacks.
-
-<p>
-Because
-<tt>rcu_read_lock_bh()</tt> and <tt>rcu_read_unlock_bh()</tt>
-disable and re-enable softirq handlers, any attempt to start a softirq
-handlers during the
-RCU-bh read-side critical section will be deferred.
-In this case, <tt>rcu_read_unlock_bh()</tt>
-will invoke softirq processing, which can take considerable time.
-One can of course argue that this softirq overhead should be associated
-with the code following the RCU-bh read-side critical section rather
-than <tt>rcu_read_unlock_bh()</tt>, but the fact
-is that most profiling tools cannot be expected to make this sort
-of fine distinction.
-For example, suppose that a three-millisecond-long RCU-bh read-side
-critical section executes during a time of heavy networking load.
-There will very likely be an attempt to invoke at least one softirq
-handler during that three milliseconds, but any such invocation will
-be delayed until the time of the <tt>rcu_read_unlock_bh()</tt>.
-This can of course make it appear at first glance as if
-<tt>rcu_read_unlock_bh()</tt> was executing very slowly.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-bh API</a>
-includes
-<tt>rcu_read_lock_bh()</tt>,
-<tt>rcu_read_unlock_bh()</tt>,
-<tt>rcu_dereference_bh()</tt>,
-<tt>rcu_dereference_bh_check()</tt>,
-<tt>synchronize_rcu_bh()</tt>,
-<tt>synchronize_rcu_bh_expedited()</tt>,
-<tt>call_rcu_bh()</tt>,
-<tt>rcu_barrier_bh()</tt>, and
-<tt>rcu_read_lock_bh_held()</tt>.
-
-<h3><a name="Sched Flavor">Sched Flavor</a></h3>
-
-<p>
-Before preemptible RCU, waiting for an RCU grace period had the
-side effect of also waiting for all pre-existing interrupt
-and NMI handlers.
-However, there are legitimate preemptible-RCU implementations that
-do not have this property, given that any point in the code outside
-of an RCU read-side critical section can be a quiescent state.
-Therefore, <i>RCU-sched</i> was created, which follows &ldquo;classic&rdquo;
-RCU in that an RCU-sched grace period waits for for pre-existing
-interrupt and NMI handlers.
-In kernels built with <tt>CONFIG_PREEMPT=n</tt>, the RCU and RCU-sched
-APIs have identical implementations, while kernels built with
-<tt>CONFIG_PREEMPT=y</tt> provide a separate implementation for each.
-
-<p>
-Note well that in <tt>CONFIG_PREEMPT=y</tt> kernels,
-<tt>rcu_read_lock_sched()</tt> and <tt>rcu_read_unlock_sched()</tt>
-disable and re-enable preemption, respectively.
-This means that if there was a preemption attempt during the
-RCU-sched read-side critical section, <tt>rcu_read_unlock_sched()</tt>
-will enter the scheduler, with all the latency and overhead entailed.
-Just as with <tt>rcu_read_unlock_bh()</tt>, this can make it look
-as if <tt>rcu_read_unlock_sched()</tt> was executing very slowly.
-However, the highest-priority task won't be preempted, so that task
-will enjoy low-overhead <tt>rcu_read_unlock_sched()</tt> invocations.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">RCU-sched API</a>
-includes
-<tt>rcu_read_lock_sched()</tt>,
-<tt>rcu_read_unlock_sched()</tt>,
-<tt>rcu_read_lock_sched_notrace()</tt>,
-<tt>rcu_read_unlock_sched_notrace()</tt>,
-<tt>rcu_dereference_sched()</tt>,
-<tt>rcu_dereference_sched_check()</tt>,
-<tt>synchronize_sched()</tt>,
-<tt>synchronize_rcu_sched_expedited()</tt>,
-<tt>call_rcu_sched()</tt>,
-<tt>rcu_barrier_sched()</tt>, and
-<tt>rcu_read_lock_sched_held()</tt>.
-However, anything that disables preemption also marks an RCU-sched
-read-side critical section, including
-<tt>preempt_disable()</tt> and <tt>preempt_enable()</tt>,
-<tt>local_irq_save()</tt> and <tt>local_irq_restore()</tt>,
-and so on.
-
-<h3><a name="Sleepable RCU">Sleepable RCU</a></h3>
-
-<p>
-For well over a decade, someone saying &ldquo;I need to block within
-an RCU read-side critical section&rdquo; was a reliable indication
-that this someone did not understand RCU.
-After all, if you are always blocking in an RCU read-side critical
-section, you can probably afford to use a higher-overhead synchronization
-mechanism.
-However, that changed with the advent of the Linux kernel's notifiers,
-whose RCU read-side critical
-sections almost never sleep, but sometimes need to.
-This resulted in the introduction of
-<a href="https://lwn.net/Articles/202847/">sleepable RCU</a>,
-or <i>SRCU</i>.
-
-<p>
-SRCU allows different domains to be defined, with each such domain
-defined by an instance of an <tt>srcu_struct</tt> structure.
-A pointer to this structure must be passed in to each SRCU function,
-for example, <tt>synchronize_srcu(&amp;ss)</tt>, where
-<tt>ss</tt> is the <tt>srcu_struct</tt> structure.
-The key benefit of these domains is that a slow SRCU reader in one
-domain does not delay an SRCU grace period in some other domain.
-That said, one consequence of these domains is that read-side code
-must pass a &ldquo;cookie&rdquo; from <tt>srcu_read_lock()</tt>
-to <tt>srcu_read_unlock()</tt>, for example, as follows:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-As noted above, it is legal to block within SRCU read-side critical sections,
-however, with great power comes great responsibility.
-If you block forever in one of a given domain's SRCU read-side critical
-sections, then that domain's grace periods will also be blocked forever.
-Of course, one good way to block forever is to deadlock, which can
-happen if any operation in a given domain's SRCU read-side critical
-section can block waiting, either directly or indirectly, for that domain's
-grace period to elapse.
-For example, this results in a self-deadlock:
-
-<blockquote>
-<pre>
- 1 int idx;
- 2
- 3 idx = srcu_read_lock(&amp;ss);
- 4 do_something();
- 5 synchronize_srcu(&amp;ss);
- 6 srcu_read_unlock(&amp;ss, idx);
-</pre>
-</blockquote>
-
-<p>
-However, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for domain <tt>ss</tt>,
-deadlock would still be possible.
-Furthermore, if line&nbsp;5 acquired a mutex that was held across
-a <tt>synchronize_srcu()</tt> for some other domain <tt>ss1</tt>,
-and if an <tt>ss1</tt>-domain SRCU read-side critical section
-acquired another mutex that was held across as <tt>ss</tt>-domain
-<tt>synchronize_srcu()</tt>,
-deadlock would again be possible.
-Such a deadlock cycle could extend across an arbitrarily large number
-of different SRCU domains.
-Again, with great power comes great responsibility.
-
-<p>
-Unlike the other RCU flavors, SRCU read-side critical sections can
-run on idle and even offline CPUs.
-This ability requires that <tt>srcu_read_lock()</tt> and
-<tt>srcu_read_unlock()</tt> contain memory barriers, which means
-that SRCU readers will run a bit slower than would RCU readers.
-It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt>
-API, which, in combination with <tt>srcu_read_unlock()</tt>,
-guarantees a full memory barrier.
-
-<p>
-The
-<a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a>
-includes
-<tt>srcu_read_lock()</tt>,
-<tt>srcu_read_unlock()</tt>,
-<tt>srcu_dereference()</tt>,
-<tt>srcu_dereference_check()</tt>,
-<tt>synchronize_srcu()</tt>,
-<tt>synchronize_srcu_expedited()</tt>,
-<tt>call_srcu()</tt>,
-<tt>srcu_barrier()</tt>, and
-<tt>srcu_read_lock_held()</tt>.
-It also includes
-<tt>DEFINE_SRCU()</tt>,
-<tt>DEFINE_STATIC_SRCU()</tt>, and
-<tt>init_srcu_struct()</tt>
-APIs for defining and initializing <tt>srcu_struct</tt> structures.
-
-<h3><a name="Tasks RCU">Tasks RCU</a></h3>
-
-<p>
-Some forms of tracing use &ldquo;tramopolines&rdquo; to handle the
-binary rewriting required to install different types of probes.
-It would be good to be able to free old trampolines, which sounds
-like a job for some form of RCU.
-However, because it is necessary to be able to install a trace
-anywhere in the code, it is not possible to use read-side markers
-such as <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
-In addition, it does not work to have these markers in the trampoline
-itself, because there would need to be instructions following
-<tt>rcu_read_unlock()</tt>.
-Although <tt>synchronize_rcu()</tt> would guarantee that execution
-reached the <tt>rcu_read_unlock()</tt>, it would not be able to
-guarantee that execution had completely left the trampoline.
-
-<p>
-The solution, in the form of
-<a href="https://lwn.net/Articles/607117/"><i>Tasks RCU</i></a>,
-is to have implicit
-read-side critical sections that are delimited by voluntary context
-switches, that is, calls to <tt>schedule()</tt>,
-<tt>cond_resched_rcu_qs()</tt>, and
-<tt>synchronize_rcu_tasks()</tt>.
-In addition, transitions to and from userspace execution also delimit
-tasks-RCU read-side critical sections.
-
-<p>
-The tasks-RCU API is quite compact, consisting only of
-<tt>call_rcu_tasks()</tt>,
-<tt>synchronize_rcu_tasks()</tt>, and
-<tt>rcu_barrier_tasks()</tt>.
-
-<h2><a name="Possible Future Changes">Possible Future Changes</a></h2>
-
-<p>
-One of the tricks that RCU uses to attain update-side scalability is
-to increase grace-period latency with increasing numbers of CPUs.
-If this becomes a serious problem, it will be necessary to rework the
-grace-period state machine so as to avoid the need for the additional
-latency.
-
-<p>
-Expedited grace periods scan the CPUs, so their latency and overhead
-increases with increasing numbers of CPUs.
-If this becomes a serious problem on large systems, it will be necessary
-to do some redesign to avoid this scalability problem.
-
-<p>
-RCU disables CPU hotplug in a few places, perhaps most notably in the
-expedited grace-period and <tt>rcu_barrier()</tt> operations.
-If there is a strong reason to use expedited grace periods in CPU-hotplug
-notifiers, it will be necessary to avoid disabling CPU hotplug.
-This would introduce some complexity, so there had better be a <i>very</i>
-good reason.
-
-<p>
-The tradeoff between grace-period latency on the one hand and interruptions
-of other CPUs on the other hand may need to be re-examined.
-The desire is of course for zero grace-period latency as well as zero
-interprocessor interrupts undertaken during an expedited grace period
-operation.
-While this ideal is unlikely to be achievable, it is quite possible that
-further improvements can be made.
-
-<p>
-The multiprocessor implementations of RCU use a combining tree that
-groups CPUs so as to reduce lock contention and increase cache locality.
-However, this combining tree does not spread its memory across NUMA
-nodes nor does it align the CPU groups with hardware features such
-as sockets or cores.
-Such spreading and alignment is currently believed to be unnecessary
-because the hotpath read-side primitives do not access the combining
-tree, nor does <tt>call_rcu()</tt> in the common case.
-If you believe that your architecture needs such spreading and alignment,
-then your architecture should also benefit from the
-<tt>rcutree.rcu_fanout_leaf</tt> boot parameter, which can be set
-to the number of CPUs in a socket, NUMA node, or whatever.
-If the number of CPUs is too large, use a fraction of the number of
-CPUs.
-If the number of CPUs is a large prime number, well, that certainly
-is an &ldquo;interesting&rdquo; architectural choice!
-More flexible arrangements might be considered, but only if
-<tt>rcutree.rcu_fanout_leaf</tt> has proven inadequate, and only
-if the inadequacy has been demonstrated by a carefully run and
-realistic system-level workload.
-
-<p>
-Please note that arrangements that require RCU to remap CPU numbers will
-require extremely good demonstration of need and full exploration of
-alternatives.
-
-<p>
-There is an embarrassingly large number of flavors of RCU, and this
-number has been increasing over time.
-Perhaps it will be possible to combine some at some future date.
-
-<p>
-RCU's various kthreads are reasonably recent additions.
-It is quite likely that adjustments will be required to more gracefully
-handle extreme loads.
-It might also be necessary to be able to relate CPU utilization by
-RCU's kthreads and softirq handlers to the code that instigated this
-CPU utilization.
-For example, RCU callback overhead might be charged back to the
-originating <tt>call_rcu()</tt> instance, though probably not
-in production kernels.
-
-<h2><a name="Summary">Summary</a></h2>
-
-<p>
-This document has presented more than two decade's worth of RCU
-requirements.
-Given that the requirements keep changing, this will not be the last
-word on this subject, but at least it serves to get an important
-subset of the requirements set forth.
-
-<h2><a name="Acknowledgments">Acknowledgments</a></h2>
-
-I am grateful to Steven Rostedt, Lai Jiangshan, Ingo Molnar,
-Oleg Nesterov, Borislav Petkov, Peter Zijlstra, Boqun Feng, and
-Andy Lutomirski for their help in rendering
-this article human readable, and to Michelle Rankin for her support
-of this effort.
-Other contributions are acknowledged in the Linux kernel's git archive.
-The cartoon is copyright (c) 2013 by Melissa Broussard,
-and is provided
-under the terms of the Creative Commons Attribution-Share Alike 3.0
-United States license.
-
-<p>@@QQAL@@
-
-</body></html>
diff --git a/Documentation/RCU/Design/htmlqqz.sh b/Documentation/RCU/Design/htmlqqz.sh
deleted file mode 100755
index d354f069559b..000000000000
--- a/Documentation/RCU/Design/htmlqqz.sh
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/bin/sh
-#
-# Usage: sh htmlqqz.sh file
-#
-# Extracts and converts quick quizzes in a proto-HTML document file.htmlx.
-# Commands, all of which must be on a line by themselves:
-#
-# "<p>@@QQ@@": Start of a quick quiz.
-# "<p>@@QQA@@": Start of a quick-quiz answer.
-# "<p>@@QQE@@": End of a quick-quiz answer, and thus of the quick quiz.
-# "<p>@@QQAL@@": Place to put quick-quiz answer list.
-#
-# Places the result in file.html.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (c) 2013 Paul E. McKenney, IBM Corporation.
-
-fn=$1
-if test ! -r $fn.htmlx
-then
- echo "Error: $fn.htmlx unreadable."
- exit 1
-fi
-
-echo "<!-- DO NOT HAND EDIT. -->" > $fn.html
-echo "<!-- Instead, edit $fn.htmlx and run 'sh htmlqqz.sh $fn' -->" >> $fn.html
-awk < $fn.htmlx >> $fn.html '
-
-state == "" && $1 != "<p>@@QQ@@" && $1 != "<p>@@QQAL@@" {
- print $0;
- if ($0 ~ /^<p>@@QQ/)
- print "Bad Quick Quiz command: " NR " (expected <p>@@QQ@@ or <p>@@QQAL@@)." > "/dev/stderr"
- next;
-}
-
-state == "" && $1 == "<p>@@QQ@@" {
- qqn++;
- qqlineno = NR;
- haveqq = 1;
- state = "qq";
- print "<p><a name=\"Quick Quiz " qqn "\"><b>Quick Quiz " qqn "</b>:</a>"
- next;
-}
-
-state == "qq" && $1 != "<p>@@QQA@@" {
- qq[qqn] = qq[qqn] $0 "\n";
- print $0
- if ($0 ~ /^<p>@@QQ/)
- print "Bad Quick Quiz command: " NR ". (expected <p>@@QQA@@)" > "/dev/stderr"
- next;
-}
-
-state == "qq" && $1 == "<p>@@QQA@@" {
- state = "qqa";
- print "<br><a href=\"#qq" qqn "answer\">Answer</a>"
- next;
-}
-
-state == "qqa" && $1 != "<p>@@QQE@@" {
- qqa[qqn] = qqa[qqn] $0 "\n";
- if ($0 ~ /^<p>@@QQ/)
- print "Bad Quick Quiz command: " NR " (expected <p>@@QQE@@)." > "/dev/stderr"
- next;
-}
-
-state == "qqa" && $1 == "<p>@@QQE@@" {
- state = "";
- next;
-}
-
-state == "" && $1 == "<p>@@QQAL@@" {
- haveqq = "";
- print "<h3><a name=\"Answers to Quick Quizzes\">"
- print "Answers to Quick Quizzes</a></h3>"
- print "";
- for (i = 1; i <= qqn; i++) {
- print "<a name=\"qq" i "answer\"></a>"
- print "<p><b>Quick Quiz " i "</b>:"
- print qq[i];
- print "";
- print "</p><p><b>Answer</b>:"
- print qqa[i];
- print "";
- print "</p><p><a href=\"#Quick%20Quiz%20" i "\"><b>Back to Quick Quiz " i "</b>.</a>"
- print "";
- }
- next;
-}
-
-END {
- if (state != "")
- print "Unterminated Quick Quiz: " qqlineno "." > "/dev/stderr"
- else if (haveqq)
- print "Missing \"<p>@@QQAL@@\", no Quick Quiz." > "/dev/stderr"
-}'
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index ec6998b1b6d0..00a3a38b375a 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
-s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
+s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
These fields are as follows:
o "s" is the sequence number, with an odd number indicating that
an expedited grace period is in progress.
-o "wd0", "wd1", "wd2", and "wd3" are the number of times that an
- attempt to start an expedited grace period found that someone
- else had completed an expedited grace period that satisfies the
- attempted request. "Our work is done."
+o "wd1", "wd2", and "wd3" are the number of times that an attempt
+ to start an expedited grace period found that someone else had
+ completed an expedited grace period that satisfies the attempted
+ request. "Our work is done."
o "n" is number of times that a concurrent CPU-hotplug operation
forced a fallback to a normal grace period.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index dc49c6712b17..111770ffa10e 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -681,22 +681,30 @@ Although RCU can be used in many different ways, a very common use of
RCU is analogous to reader-writer locking. The following unified
diff shows how closely related RCU and reader-writer locking can be.
+ @@ -5,5 +5,5 @@ struct el {
+ int data;
+ /* Other data fields */
+ };
+ -rwlock_t listmutex;
+ +spinlock_t listmutex;
+ struct el head;
+
@@ -13,15 +14,15 @@
struct list_head *lp;
struct el *p;
- - read_lock();
+ - read_lock(&listmutex);
- list_for_each_entry(p, head, lp) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(p, head, lp) {
if (p->key == key) {
*result = p->data;
- - read_unlock();
+ - read_unlock(&listmutex);
+ rcu_read_unlock();
return 1;
}
}
- - read_unlock();
+ - read_unlock(&listmutex);
+ rcu_read_unlock();
return 0;
}
@@ -732,7 +740,7 @@ Or, for those who prefer a side-by-side listing:
5 int data; 5 int data;
6 /* Other data fields */ 6 /* Other data fields */
7 }; 7 };
- 8 spinlock_t listmutex; 8 spinlock_t listmutex;
+ 8 rwlock_t listmutex; 8 spinlock_t listmutex;
9 struct el head; 9 struct el head;
1 int search(long key, int *result) 1 int search(long key, int *result)
@@ -740,15 +748,15 @@ Or, for those who prefer a side-by-side listing:
3 struct list_head *lp; 3 struct list_head *lp;
4 struct el *p; 4 struct el *p;
5 5
- 6 read_lock(); 6 rcu_read_lock();
+ 6 read_lock(&listmutex); 6 rcu_read_lock();
7 list_for_each_entry(p, head, lp) { 7 list_for_each_entry_rcu(p, head, lp) {
8 if (p->key == key) { 8 if (p->key == key) {
9 *result = p->data; 9 *result = p->data;
-10 read_unlock(); 10 rcu_read_unlock();
+10 read_unlock(&listmutex); 10 rcu_read_unlock();
11 return 1; 11 return 1;
12 } 12 }
13 } 13 }
-14 read_unlock(); 14 rcu_read_unlock();
+14 read_unlock(&listmutex); 14 rcu_read_unlock();
15 return 0; 15 return 0;
16 } 16 }
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index 7785fb5eb93f..b5ca536e56a8 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -505,6 +505,8 @@ int main(int argc, char *argv[])
if (!loop)
goto done;
break;
+ case TASKSTATS_TYPE_NULL:
+ break;
default:
fprintf(stderr, "Unknown nested"
" nla_type %d\n",
@@ -512,7 +514,8 @@ int main(int argc, char *argv[])
break;
}
len2 += NLA_ALIGN(na->nla_len);
- na = (struct nlattr *) ((char *) na + len2);
+ na = (struct nlattr *)((char *)na +
+ NLA_ALIGN(na->nla_len));
}
break;
diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt
index 35c3f5415476..eb651a6aa285 100644
--- a/Documentation/acpi/initrd_table_override.txt
+++ b/Documentation/acpi/initrd_table_override.txt
@@ -1,5 +1,5 @@
-Overriding ACPI tables via initrd
-=================================
+Upgrading ACPI tables via initrd
+================================
1) Introduction (What is this about)
2) What is this for
@@ -9,12 +9,14 @@ Overriding ACPI tables via initrd
1) What is this about
---------------------
-If the ACPI_INITRD_TABLE_OVERRIDE compile option is true, it is possible to
-override nearly any ACPI table provided by the BIOS with an instrumented,
-modified one.
+If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
+upgrade the ACPI execution environment that is defined by the ACPI tables
+via upgrading the ACPI tables provided by the BIOS with an instrumented,
+modified, more recent version one, or installing brand new ACPI tables.
-For a full list of ACPI tables that can be overridden, take a look at
-the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in drivers/acpi/osl.c
+For a full list of ACPI tables that can be upgraded/installed, take a look
+at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in
+drivers/acpi/tables.c.
All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
be overridable, except:
- ACPI_SIG_RSDP (has a signature of 6 bytes)
@@ -25,17 +27,20 @@ Both could get implemented as well.
2) What is this for
-------------------
-Please keep in mind that this is a debug option.
-ACPI tables should not get overridden for productive use.
-If BIOS ACPI tables are overridden the kernel will get tainted with the
-TAINT_OVERRIDDEN_ACPI_TABLE flag.
-Complain to your platform/BIOS vendor if you find a bug which is so sever
-that a workaround is not accepted in the Linux kernel.
+Complain to your platform/BIOS vendor if you find a bug which is so severe
+that a workaround is not accepted in the Linux kernel. And this facility
+allows you to upgrade the buggy tables before your platform/BIOS vendor
+releases an upgraded BIOS binary.
-Still, it can and should be enabled in any kernel, because:
- - There is no functional change with not instrumented initrds
- - It provides a powerful feature to easily debug and test ACPI BIOS table
- compatibility with the Linux kernel.
+This facility can be used by platform/BIOS vendors to provide a Linux
+compatible environment without modifying the underlying platform firmware.
+
+This facility also provides a powerful feature to easily debug and test
+ACPI BIOS table compatibility with the Linux kernel by modifying old
+platform provided ACPI tables or inserting new ACPI tables.
+
+It can and should be enabled in any kernel because there is no functional
+change with not instrumented initrds.
3) How does it work
@@ -50,23 +55,31 @@ iasl -d *.dat
# For example add this statement into a _PRT (PCI Routing Table) function
# of the DSDT:
Store("HELLO WORLD", debug)
+# And increase the OEM Revision. For example, before modification:
+DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
+# After modification:
+DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
iasl -sa dsdt.dsl
# Add the raw ACPI tables to an uncompressed cpio archive.
-# They must be put into a /kernel/firmware/acpi directory inside the
-# cpio archive.
-# The uncompressed cpio archive must be the first.
-# Other, typically compressed cpio archives, must be
-# concatenated on top of the uncompressed one.
+# They must be put into a /kernel/firmware/acpi directory inside the cpio
+# archive. Note that if the table put here matches a platform table
+# (similar Table Signature, and similar OEMID, and similar OEM Table ID)
+# with a more recent OEM Revision, the platform table will be upgraded by
+# this table. If the table put here doesn't match a platform table
+# (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
+# ID), this table will be appended.
mkdir -p kernel/firmware/acpi
cp dsdt.aml kernel/firmware/acpi
-# A maximum of: #define ACPI_OVERRIDE_TABLES 10
-# tables are currently allowed (see osl.c):
+# A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
+# (see osl.c):
iasl -sa facp.dsl
iasl -sa ssdt1.dsl
cp facp.aml kernel/firmware/acpi
cp ssdt1.aml kernel/firmware/acpi
-# Create the uncompressed cpio archive and concatenate the original initrd
-# on top:
+# The uncompressed cpio archive must be the first. Other, typically
+# compressed cpio archives, must be concatenated on top of the uncompressed
+# one. Following command creates the uncompressed cpio archive and
+# concatenates the original initrd on top:
find kernel | cpio -H newc --create > /boot/instrumented_initrd
cat /boot/initrd >>/boot/instrumented_initrd
# reboot with increased acpi debug level, e.g. boot params:
diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 56d6d8b796db..8d0df62c3fe0 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -132,6 +132,10 @@ NOTE: versions prior to v4.6 cannot make use of memory below the
physical offset of the Image so it is recommended that the Image be
placed as close as possible to the start of system RAM.
+If an initrd/initramfs is passed to the kernel at boot, it must reside
+entirely within a 1 GB aligned physical memory window of up to 32 GB in
+size that fully covers the kernel Image as well.
+
Any memory described to the kernel (even that below the start of the
image) which is not marked as reserved from the kernel (e.g., with a
memreserve region in the device tree) will be considered as available to
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index ba4b6acfc545..c6938e50e71f 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -53,7 +53,9 @@ stable kernels.
| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 |
| ARM | Cortex-A57 | #852523 | N/A |
| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 |
+| ARM | MMU-500 | #841119,#826419 | N/A |
| | | | |
| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 |
| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
+| Cavium | ThunderX SMMUv2 | #27704 | N/A |
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index e5d914845be6..dce25d848d92 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -141,6 +141,15 @@ control of this block device to that new IO scheduler. Note that writing
an IO scheduler name to this file will attempt to load that IO scheduler
module, if it isn't already present in the system.
+write_cache (RW)
+----------------
+When read, this file will display whether the device has write back
+caching enabled or not. It will return "write back" for the former
+case, and "write through" for the latter. Writing to this file can
+change the kernels view of the device, but it doesn't alter the
+device state. This means that it might not be safe to toggle the
+setting from "write back" to "write through", since that will also
+eliminate cache flushes issued by the kernel.
Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt
index 83407d36630a..59e0516cbf6b 100644
--- a/Documentation/block/writeback_cache_control.txt
+++ b/Documentation/block/writeback_cache_control.txt
@@ -71,7 +71,7 @@ requests that have a payload. For devices with volatile write caches the
driver needs to tell the block layer that it supports flushing caches by
doing:
- blk_queue_flush(sdkp->disk->queue, REQ_FLUSH);
+ blk_queue_write_cache(sdkp->disk->queue, true, false);
and handle empty REQ_FLUSH requests in its prep_fn/request_fn. Note that
REQ_FLUSH requests with a payload are automatically turned into a sequence
@@ -79,7 +79,7 @@ of an empty REQ_FLUSH request followed by the actual write by the block
layer. For devices that also support the FUA bit the block layer needs
to be told to pass through the REQ_FUA bit using:
- blk_queue_flush(sdkp->disk->queue, REQ_FLUSH | REQ_FUA);
+ blk_queue_write_cache(sdkp->disk->queue, true, true);
and the driver must handle write requests that have the REQ_FUA bit set
in prep_fn/request_fn. If the FUA bit is not natively supported the block
diff --git a/Documentation/device-mapper/cache-policies.txt b/Documentation/device-mapper/cache-policies.txt
index e5062ad18717..d3ca8af21a31 100644
--- a/Documentation/device-mapper/cache-policies.txt
+++ b/Documentation/device-mapper/cache-policies.txt
@@ -11,7 +11,7 @@ Every bio that is mapped by the target is referred to the policy.
The policy can return a simple HIT or MISS or issue a migration.
Currently there's no way for the policy to issue background work,
-e.g. to start writing back dirty blocks that are going to be evicte
+e.g. to start writing back dirty blocks that are going to be evicted
soon.
Because we map bios, rather than requests it's easy for the policy
@@ -48,7 +48,7 @@ with the multiqueue (mq) policy.
The smq policy (vs mq) offers the promise of less memory utilization,
improved performance and increased adaptability in the face of changing
-workloads. SMQ also does not have any cumbersome tuning knobs.
+workloads. smq also does not have any cumbersome tuning knobs.
Users may switch from "mq" to "smq" simply by appropriately reloading a
DM table that is using the cache target. Doing so will cause all of the
@@ -57,47 +57,45 @@ degrade slightly until smq recalculates the origin device's hotspots
that should be cached.
Memory usage:
-The mq policy uses a lot of memory; 88 bytes per cache block on a 64
+The mq policy used a lot of memory; 88 bytes per cache block on a 64
bit machine.
-SMQ uses 28bit indexes to implement it's data structures rather than
+smq uses 28bit indexes to implement it's data structures rather than
pointers. It avoids storing an explicit hit count for each block. It
-has a 'hotspot' queue rather than a pre cache which uses a quarter of
+has a 'hotspot' queue, rather than a pre-cache, which uses a quarter of
the entries (each hotspot block covers a larger area than a single
cache block).
-All these mean smq uses ~25bytes per cache block. Still a lot of
+All this means smq uses ~25bytes per cache block. Still a lot of
memory, but a substantial improvement nontheless.
Level balancing:
-MQ places entries in different levels of the multiqueue structures
-based on their hit count (~ln(hit count)). This means the bottom
-levels generally have the most entries, and the top ones have very
-few. Having unbalanced levels like this reduces the efficacy of the
+mq placed entries in different levels of the multiqueue structures
+based on their hit count (~ln(hit count)). This meant the bottom
+levels generally had the most entries, and the top ones had very
+few. Having unbalanced levels like this reduced the efficacy of the
multiqueue.
-SMQ does not maintain a hit count, instead it swaps hit entries with
-the least recently used entry from the level above. The over all
+smq does not maintain a hit count, instead it swaps hit entries with
+the least recently used entry from the level above. The overall
ordering being a side effect of this stochastic process. With this
scheme we can decide how many entries occupy each multiqueue level,
resulting in better promotion/demotion decisions.
Adaptability:
-The MQ policy maintains a hit count for each cache block. For a
+The mq policy maintained a hit count for each cache block. For a
different block to get promoted to the cache it's hit count has to
-exceed the lowest currently in the cache. This means it can take a
+exceed the lowest currently in the cache. This meant it could take a
long time for the cache to adapt between varying IO patterns.
-Periodically degrading the hit counts could help with this, but I
-haven't found a nice general solution.
-SMQ doesn't maintain hit counts, so a lot of this problem just goes
+smq doesn't maintain hit counts, so a lot of this problem just goes
away. In addition it tracks performance of the hotspot queue, which
is used to decide which blocks to promote. If the hotspot queue is
performing badly then it starts moving entries more quickly between
levels. This lets it adapt to new IO patterns very quickly.
Performance:
-Testing SMQ shows substantially better performance than MQ.
+Testing smq shows substantially better performance than mq.
cleaner
-------
diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt
index 6f5ef944ca4c..170ac02a1f50 100644
--- a/Documentation/device-mapper/statistics.txt
+++ b/Documentation/device-mapper/statistics.txt
@@ -205,7 +205,7 @@ statistics on them:
dmsetup message vol 0 @stats_create - /100
-Set the auxillary data string to "foo bar baz" (the escape for each
+Set the auxiliary data string to "foo bar baz" (the escape for each
space must also be escaped, otherwise the shell will consume them):
dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
diff --git a/Documentation/devicetree/bindings/arc/archs-pct.txt b/Documentation/devicetree/bindings/arc/archs-pct.txt
index 1ae98b87c640..e4b9dcee6d41 100644
--- a/Documentation/devicetree/bindings/arc/archs-pct.txt
+++ b/Documentation/devicetree/bindings/arc/archs-pct.txt
@@ -2,7 +2,7 @@
The ARC HS can be configured with a pipeline performance monitor for counting
CPU and cache events like cache misses and hits. Like conventional PCT there
-are 100+ hardware conditions dynamically mapped to upto 32 counters.
+are 100+ hardware conditions dynamically mapped to up to 32 counters.
It also supports overflow interrupts.
Required properties:
diff --git a/Documentation/devicetree/bindings/arc/eznps.txt b/Documentation/devicetree/bindings/arc/eznps.txt
new file mode 100644
index 000000000000..1aa50c640678
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/eznps.txt
@@ -0,0 +1,7 @@
+EZchip NPS Network Processor Platforms Device Tree Bindings
+---------------------------------------------------------------------------
+
+Appliance main board with NPS400 ASIC.
+
+Required root node properties:
+ - compatible = "ezchip,arc-nps";
diff --git a/Documentation/devicetree/bindings/arc/pct.txt b/Documentation/devicetree/bindings/arc/pct.txt
index 7b9588444f20..4e874d9a38a6 100644
--- a/Documentation/devicetree/bindings/arc/pct.txt
+++ b/Documentation/devicetree/bindings/arc/pct.txt
@@ -2,7 +2,7 @@
The ARC700 can be configured with a pipeline performance monitor for counting
CPU and cache events like cache misses and hits. Like conventional PCT there
-are 100+ hardware conditions dynamically mapped to upto 32 counters
+are 100+ hardware conditions dynamically mapped to up to 32 counters
Note that:
* The ARC 700 PCT does not support interrupts; although HW events may be
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
index 885f93d14ef9..5a6b16070a33 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
@@ -3,6 +3,7 @@ This driver uses the EDAC framework to implement the SOCFPGA ECC Manager.
The ECC Manager counts and corrects single bit errors and counts/handles
double bit errors which are uncorrectable.
+Cyclone5 and Arria5 ECC Manager
Required Properties:
- compatible : Should be "altr,socfpga-ecc-manager"
- #address-cells: must be 1
@@ -47,3 +48,52 @@ Example:
interrupts = <0 178 1>, <0 179 1>;
};
};
+
+Arria10 SoCFPGA ECC Manager
+The Arria10 SoC ECC Manager handles the IRQs for each peripheral
+in a shared register instead of individual IRQs like the Cyclone5
+and Arria5. Therefore the device tree is different as well.
+
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ecc-manager"
+- altr,sysgr-syscon : phandle to Arria10 System Manager Block
+ containing the ECC manager registers.
+- #address-cells: must be 1
+- #size-cells: must be 1
+- interrupts : Should be single bit error interrupt, then double bit error
+ interrupt. Note the rising edge type.
+- ranges : standard definition, should translate from local addresses
+
+Subcomponents:
+
+L2 Cache ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-l2-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+
+On-Chip RAM ECC
+Required Properties:
+- compatible : Should be "altr,socfpga-a10-ocram-ecc"
+- reg : Address and size for ECC block registers.
+
+Example:
+
+ eccmgr: eccmgr@ffd06000 {
+ compatible = "altr,socfpga-a10-ecc-manager";
+ altr,sysmgr-syscon = <&sysmgr>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupts = <0 2 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 IRQ_TYPE_LEVEL_HIGH>;
+ ranges;
+
+ l2-ecc@ffd06010 {
+ compatible = "altr,socfpga-a10-l2-ecc";
+ reg = <0xffd06010 0x4>;
+ };
+
+ ocram-ecc@ff8c3000 {
+ compatible = "altr,socfpga-a10-ocram-ecc";
+ reg = <0xff8c3000 0x90>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt
index 8a5122ab19b0..fcc6f6c10803 100644
--- a/Documentation/devicetree/bindings/arm/amlogic.txt
+++ b/Documentation/devicetree/bindings/arm/amlogic.txt
@@ -25,3 +25,6 @@ Board compatible values:
- "tronsmart,vega-s95-pro", "tronsmart,vega-s95" (Meson gxbb)
- "tronsmart,vega-s95-meta", "tronsmart,vega-s95" (Meson gxbb)
- "tronsmart,vega-s95-telos", "tronsmart,vega-s95" (Meson gxbb)
+ - "hardkernel,odroid-c2" (Meson gxbb)
+ - "amlogic,p200" (Meson gxbb)
+ - "amlogic,p201" (Meson gxbb)
diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards
index 0226bc2cc1f6..ab318a56fca2 100644
--- a/Documentation/devicetree/bindings/arm/arm-boards
+++ b/Documentation/devicetree/bindings/arm/arm-boards
@@ -93,6 +93,14 @@ Required nodes:
a core-module with regs and the compatible strings
"arm,core-module-versatile", "syscon"
+Optional nodes:
+
+- arm,versatile-ib2-syscon : if the Versatile has an IB2 interface
+ board mounted, this has a separate system controller that is
+ defined in this node.
+ Required properties:
+ compatible = "arm,versatile-ib2-syscon", "syscon"
+
ARM RealView Boards
-------------------
The RealView boards cover tailored evaluation boards that are used to explore
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt
index 7fd64ec9ee1d..1d8004633479 100644
--- a/Documentation/devicetree/bindings/arm/atmel-at91.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt
@@ -41,6 +41,10 @@ compatible: must be one of:
- "atmel,sama5d43"
- "atmel,sama5d44"
+Chipid required properties:
+- compatible: Should be "atmel,sama5d2-chipid"
+- reg : Should contain registers location and length
+
PIT Timer required properties:
- compatible: Should be "atmel,at91sam9260-pit"
- reg: Should contain registers location and length
@@ -155,7 +159,7 @@ elsewhere.
required properties:
- compatible: Should be "atmel,<chip>-sfr", "syscon".
- <chip> can be "sama5d3" or "sama5d4".
+ <chip> can be "sama5d3", "sama5d4" or "sama5d2".
- reg: Should contain registers location and length
sfr@f0038000 {
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
index ccc62f145306..3f0cbbb8395f 100644
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -192,7 +192,6 @@ nodes to be present and contain the properties described below.
can be one of:
"allwinner,sun6i-a31"
"allwinner,sun8i-a23"
- "arm,psci"
"arm,realview-smp"
"brcm,bcm-nsp-smp"
"brcm,brahma-b15"
diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt
index 752a685d926f..dbbc0952021c 100644
--- a/Documentation/devicetree/bindings/arm/fsl.txt
+++ b/Documentation/devicetree/bindings/arm/fsl.txt
@@ -135,6 +135,10 @@ LS1043A ARMv8 based RDB Board
Required root node properties:
- compatible = "fsl,ls1043a-rdb", "fsl,ls1043a";
+LS1043A ARMv8 based QDS Board
+Required root node properties:
+ - compatible = "fsl,ls1043a-qds", "fsl,ls1043a";
+
LS2080A ARMv8 based Simulator model
Required root node properties:
- compatible = "fsl,ls2080a-simu", "fsl,ls2080a";
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
index e3ccab114006..83fe816ae050 100644
--- a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
+++ b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
@@ -1,29 +1,33 @@
Hisilicon Platforms Device Tree Bindings
----------------------------------------------------
-Hi6220 SoC
-Required root node properties:
- - compatible = "hisilicon,hi6220";
-
Hi4511 Board
Required root node properties:
- compatible = "hisilicon,hi3620-hi4511";
-HiP04 D01 Board
+Hi6220 SoC
Required root node properties:
- - compatible = "hisilicon,hip04-d01";
+ - compatible = "hisilicon,hi6220";
+
+HiKey Board
+Required root node properties:
+ - compatible = "hisilicon,hi6220-hikey", "hisilicon,hi6220";
HiP01 ca9x2 Board
Required root node properties:
- compatible = "hisilicon,hip01-ca9x2";
-HiKey Board
+HiP04 D01 Board
Required root node properties:
- - compatible = "hisilicon,hi6220-hikey", "hisilicon,hi6220";
+ - compatible = "hisilicon,hip04-d01";
HiP05 D02 Board
Required root node properties:
- compatible = "hisilicon,hip05-d02";
+HiP06 D03 Board
+Required root node properties:
+ - compatible = "hisilicon,hip06-d03";
+
Hisilicon system controller
Required properties:
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
index 21e71a5e866e..94b57f247615 100644
--- a/Documentation/devicetree/bindings/arm/omap/omap.txt
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -133,6 +133,9 @@ Boards:
- AM335X Bone : Low cost community board
compatible = "ti,am335x-bone", "ti,am33xx", "ti,omap3"
+- AM3359 ICEv2 : Low cost Industrial Communication Engine EVM.
+ compatible = "ti,am3359-icev2", "ti,am33xx", "ti,omap3"
+
- AM335X OrionLXm : Substation Automation Platform
compatible = "novatech,am335x-lxm", "ti,am33xx"
@@ -169,6 +172,9 @@ Boards:
- AM57XX SBC-AM57x
compatible = "compulab,sbc-am57x", "compulab,cl-som-am57x", "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
+- AM5728 IDK
+ compatible = "ti,am5728-idk", "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
+
- DRA742 EVM: Software Development Board for DRA742
compatible = "ti,dra7-evm", "ti,dra742", "ti,dra74", "ti,dra7"
diff --git a/Documentation/devicetree/bindings/arm/oxnas.txt b/Documentation/devicetree/bindings/arm/oxnas.txt
new file mode 100644
index 000000000000..b9e49711ba05
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/oxnas.txt
@@ -0,0 +1,9 @@
+Oxford Semiconductor OXNAS SoCs Family device tree bindings
+-------------------------------------------
+
+Boards with the OX810SE SoC shall have the following properties:
+ Required root node property:
+ compatible: "oxsemi,ox810se"
+
+Board compatible values:
+ - "wd,mbwe" (OX810SE)
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 6eb73be9433e..74d5417d0410 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -22,10 +22,11 @@ Required properties:
"arm,arm11mpcore-pmu"
"arm,arm1176-pmu"
"arm,arm1136-pmu"
+ "brcm,vulcan-pmu"
+ "cavium,thunder-pmu"
"qcom,scorpion-pmu"
"qcom,scorpion-mp-pmu"
"qcom,krait-pmu"
- "cavium,thunder-pmu"
- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
interrupt (PPI) then 1 interrupt should be specified.
diff --git a/Documentation/devicetree/bindings/arm/rockchip.txt b/Documentation/devicetree/bindings/arm/rockchip.txt
index 078c14fcdaaa..715d960d5eea 100644
--- a/Documentation/devicetree/bindings/arm/rockchip.txt
+++ b/Documentation/devicetree/bindings/arm/rockchip.txt
@@ -39,6 +39,10 @@ Rockchip platforms device tree bindings
Required root node properties:
- compatible = "netxeon,r89", "rockchip,rk3288";
+- GeekBuying GeekBox:
+ Required root node properties:
+ - compatible = "geekbuying,geekbox", "rockchip,rk3368";
+
- Google Brain (dev-board):
Required root node properties:
- compatible = "google,veyron-brain-rev0", "google,veyron-brain",
@@ -87,6 +91,10 @@ Rockchip platforms device tree bindings
"google,veyron-speedy-rev3", "google,veyron-speedy-rev2",
"google,veyron-speedy", "google,veyron", "rockchip,rk3288";
+- mqmaker MiQi:
+ Required root node properties:
+ - compatible = "mqmaker,miqi", "rockchip,rk3288";
+
- Rockchip RK3368 evb:
Required root node properties:
- compatible = "rockchip,rk3368-evb-act8846", "rockchip,rk3368";
@@ -97,4 +105,8 @@ Rockchip platforms device tree bindings
- Rockchip RK3228 Evaluation board:
Required root node properties:
- - compatible = "rockchip,rk3228-evb", "rockchip,rk3228";
+ - compatible = "rockchip,rk3228-evb", "rockchip,rk3228";
+
+- Rockchip RK3399 evb:
+ Required root node properties:
+ - compatible = "rockchip,rk3399-evb", "rockchip,rk3399";
diff --git a/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt b/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt
index 12129c011c8f..f5deace2b380 100644
--- a/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt
+++ b/Documentation/devicetree/bindings/arm/samsung/samsung-boards.txt
@@ -2,6 +2,8 @@
Required root node properties:
- compatible = should be one or more of the following.
+ - "samsung,artik5" - for Exynos3250-based Samsung ARTIK5 module.
+ - "samsung,artik5-eval" - for Exynos3250-based Samsung ARTIK5 eval board.
- "samsung,monk" - for Exynos3250-based Samsung Simband board.
- "samsung,rinato" - for Exynos3250-based Samsung Gear2 board.
- "samsung,smdkv310" - for Exynos4210-based Samsung SMDKV310 eval board.
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt
index 02c27004d4a8..a74b37b07e5c 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt
@@ -1,16 +1,20 @@
NVIDIA Tegra Power Management Controller (PMC)
+== Power Management Controller Node ==
+
The PMC block interacts with an external Power Management Unit. The PMC
mostly controls the entry and exit of the system from different sleep
modes. It provides power-gating controllers for SoC and CPU power-islands.
Required properties:
- name : Should be pmc
-- compatible : For Tegra20, must contain "nvidia,tegra20-pmc". For Tegra30,
- must contain "nvidia,tegra30-pmc". For Tegra114, must contain
- "nvidia,tegra114-pmc". For Tegra124, must contain "nvidia,tegra124-pmc".
- Otherwise, must contain "nvidia,<chip>-pmc", plus at least one of the
- above, where <chip> is tegra132.
+- compatible : Should contain one of the following:
+ For Tegra20 must contain "nvidia,tegra20-pmc".
+ For Tegra30 must contain "nvidia,tegra30-pmc".
+ For Tegra114 must contain "nvidia,tegra114-pmc"
+ For Tegra124 must contain "nvidia,tegra124-pmc"
+ For Tegra132 must contain "nvidia,tegra124-pmc"
+ For Tegra210 must contain "nvidia,tegra210-pmc"
- reg : Offset and length of the register set for the device
- clocks : Must contain an entry for each entry in clock-names.
See ../clocks/clock-bindings.txt for details.
@@ -68,6 +72,11 @@ Optional properties for hardware-triggered thermal reset (inside 'i2c-thermtrip'
Defaults to 0. Valid values are described in section 12.5.2
"Pinmux Support" of the Tegra4 Technical Reference Manual.
+Optional nodes:
+- powergates : This node contains a hierarchy of power domain nodes, which
+ should match the powergates on the Tegra SoC. See "Powergate
+ Nodes" below.
+
Example:
/ SoC dts including file
@@ -113,3 +122,76 @@ pmc@7000f400 {
};
...
};
+
+
+== Powergate Nodes ==
+
+Each of the powergate nodes represents a power-domain on the Tegra SoC
+that can be power-gated by the Tegra PMC. The name of the powergate node
+should be one of the below. Note that not every powergate is applicable
+to all Tegra devices and the following list shows which powergates are
+applicable to which devices. Please refer to the Tegra TRM for more
+details on the various powergates.
+
+ Name Description Devices Applicable
+ 3d 3D Graphics Tegra20/114/124/210
+ 3d0 3D Graphics 0 Tegra30
+ 3d1 3D Graphics 1 Tegra30
+ aud Audio Tegra210
+ dfd Debug Tegra210
+ dis Display A Tegra114/124/210
+ disb Display B Tegra114/124/210
+ heg 2D Graphics Tegra30/114/124/210
+ iram Internal RAM Tegra124/210
+ mpe MPEG Encode All
+ nvdec NVIDIA Video Decode Engine Tegra210
+ nvjpg NVIDIA JPEG Engine Tegra210
+ pcie PCIE Tegra20/30/124/210
+ sata SATA Tegra30/124/210
+ sor Display interfaces Tegra124/210
+ ve2 Video Encode Engine 2 Tegra210
+ venc Video Encode Engine All
+ vdec Video Decode Engine Tegra20/30/114/124
+ vic Video Imaging Compositor Tegra124/210
+ xusba USB Partition A Tegra114/124/210
+ xusbb USB Partition B Tegra114/124/210
+ xusbc USB Partition C Tegra114/124/210
+
+Required properties:
+ - clocks: Must contain an entry for each clock required by the PMC for
+ controlling a power-gate. See ../clocks/clock-bindings.txt for details.
+ - resets: Must contain an entry for each reset required by the PMC for
+ controlling a power-gate. See ../reset/reset.txt for details.
+ - #power-domain-cells: Must be 0.
+
+Example:
+
+ pmc: pmc@7000e400 {
+ compatible = "nvidia,tegra210-pmc";
+ reg = <0x0 0x7000e400 0x0 0x400>;
+ clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>;
+ clock-names = "pclk", "clk32k_in";
+
+ powergates {
+ pd_audio: aud {
+ clocks = <&tegra_car TEGRA210_CLK_APE>,
+ <&tegra_car TEGRA210_CLK_APB2APE>;
+ resets = <&tegra_car 198>;
+ #power-domain-cells = <0>;
+ };
+ };
+ };
+
+
+== Powergate Clients ==
+
+Hardware blocks belonging to a power domain should contain a "power-domains"
+property that is a phandle pointing to the corresponding powergate node.
+
+Example:
+
+ adma: adma@702e2000 {
+ ...
+ power-domains = <&pd_audio>;
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index 30df832a6f2f..87adfb227ca9 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -32,6 +32,10 @@ Optional properties:
- target-supply : regulator for SATA target power
- phys : reference to the SATA PHY node
- phy-names : must be "sata-phy"
+- ports-implemented : Mask that indicates which ports that the HBA supports
+ are available for software to use. Useful if PORTS_IMPL
+ is not programmed by the BIOS, which is true with
+ some embedded SOC's.
Required properties when using sub-nodes:
- #address-cells : number of cells to encode an address
diff --git a/Documentation/devicetree/bindings/btmrvl.txt b/Documentation/devicetree/bindings/btmrvl.txt
deleted file mode 100644
index 58f964bb0a52..000000000000
--- a/Documentation/devicetree/bindings/btmrvl.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-btmrvl
-------
-
-Required properties:
-
- - compatible : must be "btmrvl,cfgdata"
-
-Optional properties:
-
- - btmrvl,cal-data : Calibration data downloaded to the device during
- initialization. This is an array of 28 values(u8).
-
- - btmrvl,gpio-gap : gpio and gap (in msecs) combination to be
- configured.
-
-Example:
-
-GPIO pin 13 is configured as a wakeup source and GAP is set to 100 msecs
-in below example.
-
-btmrvl {
- compatible = "btmrvl,cfgdata";
-
- btmrvl,cal-data = /bits/ 8 <
- 0x37 0x01 0x1c 0x00 0xff 0xff 0xff 0xff 0x01 0x7f 0x04 0x02
- 0x00 0x00 0xba 0xce 0xc0 0xc6 0x2d 0x00 0x00 0x00 0x00 0x00
- 0x00 0x00 0xf0 0x00>;
- btmrvl,gpio-gap = <0x0d64>;
-};
diff --git a/Documentation/devicetree/bindings/clock/microchip,pic32.txt b/Documentation/devicetree/bindings/clock/microchip,pic32.txt
new file mode 100644
index 000000000000..c93d88fdd858
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/microchip,pic32.txt
@@ -0,0 +1,39 @@
+Microchip PIC32 Clock Controller Binding
+----------------------------------------
+Microchip clock controller is consists of few oscillators, PLL, multiplexer
+and few divider modules.
+
+This binding uses common clock bindings.
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible: shall be "microchip,pic32mzda-clk".
+- reg: shall contain base address and length of clock registers.
+- #clock-cells: shall be 1.
+
+Optional properties:
+- microchip,pic32mzda-sosc: shall be added only if platform has
+ secondary oscillator connected.
+
+Example:
+ rootclk: clock-controller@1f801200 {
+ compatible = "microchip,pic32mzda-clk";
+ reg = <0x1f801200 0x200>;
+ #clock-cells = <1>;
+ /* optional */
+ microchip,pic32mzda-sosc;
+ };
+
+
+The clock consumer shall specify the desired clock-output of the clock
+controller (as defined in [2]) by specifying output-id in its "clock"
+phandle cell.
+[2] include/dt-bindings/clock/microchip,pic32-clock.h
+
+For example for UART2:
+uart2: serial@2 {
+ compatible = "microchip,pic32mzda-uart";
+ reg = <>;
+ interrupts = <>;
+ clocks = <&rootclk PB2CLK>;
+};
diff --git a/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt b/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt
index e0fc2c11dd00..241fb0545b9e 100644
--- a/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt
+++ b/Documentation/devicetree/bindings/clock/qca,ath79-pll.txt
@@ -3,7 +3,7 @@ Binding for Qualcomm Atheros AR7xxx/AR9XXX PLL controller
The PPL controller provides the 3 main clocks of the SoC: CPU, DDR and AHB.
Required Properties:
-- compatible: has to be "qca,<soctype>-cpu-intc" and one of the following
+- compatible: has to be "qca,<soctype>-pll" and one of the following
fallbacks:
- "qca,ar7100-pll"
- "qca,ar7240-pll"
@@ -21,8 +21,8 @@ Optional properties:
Example:
- memory-controller@18050000 {
- compatible = "qca,ar9132-ppl", "qca,ar9130-pll";
+ pll-controller@18050000 {
+ compatible = "qca,ar9132-pll", "qca,ar9130-pll";
reg = <0x18050000 0x20>;
clock-names = "ref";
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-scc.txt b/Documentation/devicetree/bindings/crypto/fsl-imx-scc.txt
new file mode 100644
index 000000000000..7aad448e8a36
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-imx-scc.txt
@@ -0,0 +1,21 @@
+Freescale Security Controller (SCC)
+
+Required properties:
+- compatible : Should be "fsl,imx25-scc".
+- reg : Should contain register location and length.
+- interrupts : Should contain interrupt numbers for SCM IRQ and SMN IRQ.
+- interrupt-names : Should specify the names "scm" and "smn" for the
+ SCM IRQ and SMN IRQ.
+- clocks: Should contain the clock driving the SCC core.
+- clock-names: Should be set to "ipg".
+
+Example:
+
+ scc: crypto@53fac000 {
+ compatible = "fsl,imx25-scc";
+ reg = <0x53fac000 0x4000>;
+ clocks = <&clks 111>;
+ clock-names = "ipg";
+ interrupts = <49>, <50>;
+ interrupt-names = "scm", "smn";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/samsung-sss.txt b/Documentation/devicetree/bindings/crypto/samsung-sss.txt
index a6dafa83c6df..7a5ca56683cc 100644
--- a/Documentation/devicetree/bindings/crypto/samsung-sss.txt
+++ b/Documentation/devicetree/bindings/crypto/samsung-sss.txt
@@ -23,10 +23,8 @@ Required properties:
- "samsung,exynos4210-secss" for Exynos4210, Exynos4212, Exynos4412, Exynos5250,
Exynos5260 and Exynos5420 SoCs.
- reg : Offset and length of the register set for the module
-- interrupts : interrupt specifiers of SSS module interrupts, should contain
- following entries:
- - first : feed control interrupt (required for all variants),
- - second : hash interrupt (required only for samsung,s5pv210-secss).
+- interrupts : interrupt specifiers of SSS module interrupts (one feed
+ control interrupt).
- clocks : list of clock phandle and specifier pairs for all clocks listed in
clock-names property.
diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
new file mode 100644
index 000000000000..fd459f00aa5a
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/exynos-nocp.txt
@@ -0,0 +1,26 @@
+
+* Samsung Exynos NoC (Network on Chip) Probe device
+
+The Samsung Exynos542x SoC has NoC (Network on Chip) Probe for NoC bus.
+NoC provides the primitive values to get the performance data. The packets
+that the Network on Chip (NoC) probes detects are transported over
+the network infrastructure to observer units. You can configure probes to
+capture packets with header or data on the data request response network,
+or as traffic debug or statistic collectors. Exynos542x bus has multiple
+NoC probes to provide bandwidth information about behavior of the SoC
+that you can use while analyzing system performance.
+
+Required properties:
+- compatible: Should be "samsung,exynos5420-nocp"
+- reg: physical base address of each NoC Probe and length of memory mapped region.
+
+Optional properties:
+- clock-names : the name of clock used by the NoC Probe, "nocp"
+- clocks : phandles for clock specified in "clock-names" property
+
+Example : NoC Probe nodes in Device Tree are listed below.
+
+ nocp_mem0_0: nocp@10CA1000 {
+ compatible = "samsung,exynos5420-nocp";
+ reg = <0x10CA1000 0x200>;
+ };
diff --git a/Documentation/devicetree/bindings/devfreq/exynos-bus.txt b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
new file mode 100644
index 000000000000..d3ec8e676b6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/exynos-bus.txt
@@ -0,0 +1,409 @@
+* Generic Exynos Bus frequency device
+
+The Samsung Exynos SoC has many buses for data transfer between DRAM
+and sub-blocks in SoC. Most Exynos SoCs share the common architecture
+for buses. Generally, each bus of Exynos SoC includes a source clock
+and a power line, which are able to change the clock frequency
+of the bus in runtime. To monitor the usage of each bus in runtime,
+the driver uses the PPMU (Platform Performance Monitoring Unit), which
+is able to measure the current load of sub-blocks.
+
+The Exynos SoC includes the various sub-blocks which have the each AXI bus.
+The each AXI bus has the owned source clock but, has not the only owned
+power line. The power line might be shared among one more sub-blocks.
+So, we can divide into two type of device as the role of each sub-block.
+There are two type of bus devices as following:
+- parent bus device
+- passive bus device
+
+Basically, parent and passive bus device share the same power line.
+The parent bus device can only change the voltage of shared power line
+and the rest bus devices (passive bus device) depend on the decision of
+the parent bus device. If there are three blocks which share the VDD_xxx
+power line, Only one block should be parent device and then the rest blocks
+should depend on the parent device as passive device.
+
+ VDD_xxx |--- A block (parent)
+ |--- B block (passive)
+ |--- C block (passive)
+
+There are a little different composition among Exynos SoC because each Exynos
+SoC has different sub-blocks. Therefore, such difference should be specified
+in devicetree file instead of each device driver. In result, this driver
+is able to support the bus frequency for all Exynos SoCs.
+
+Required properties for all bus devices:
+- compatible: Should be "samsung,exynos-bus".
+- clock-names : the name of clock used by the bus, "bus".
+- clocks : phandles for clock specified in "clock-names" property.
+- operating-points-v2: the OPP table including frequency/voltage information
+ to support DVFS (Dynamic Voltage/Frequency Scaling) feature.
+
+Required properties only for parent bus device:
+- vdd-supply: the regulator to provide the buses with the voltage.
+- devfreq-events: the devfreq-event device to monitor the current utilization
+ of buses.
+
+Required properties only for passive bus device:
+- devfreq: the parent bus device.
+
+Optional properties only for parent bus device:
+- exynos,saturation-ratio: the percentage value which is used to calibrate
+ the performance count against total cycle count.
+- exynos,voltage-tolerance: the percentage value for bus voltage tolerance
+ which is used to calculate the max voltage.
+
+Detailed correlation between sub-blocks and power line according to Exynos SoC:
+- In case of Exynos3250, there are two power line as following:
+ VDD_MIF |--- DMC
+
+ VDD_INT |--- LEFTBUS (parent device)
+ |--- PERIL
+ |--- MFC
+ |--- G3D
+ |--- RIGHTBUS
+ |--- PERIR
+ |--- FSYS
+ |--- LCD0
+ |--- PERIR
+ |--- ISP
+ |--- CAM
+
+- In case of Exynos4210, there is one power line as following:
+ VDD_INT |--- DMC (parent device)
+ |--- LEFTBUS
+ |--- PERIL
+ |--- MFC(L)
+ |--- G3D
+ |--- TV
+ |--- LCD0
+ |--- RIGHTBUS
+ |--- PERIR
+ |--- MFC(R)
+ |--- CAM
+ |--- FSYS
+ |--- GPS
+ |--- LCD0
+ |--- LCD1
+
+- In case of Exynos4x12, there are two power line as following:
+ VDD_MIF |--- DMC
+
+ VDD_INT |--- LEFTBUS (parent device)
+ |--- PERIL
+ |--- MFC(L)
+ |--- G3D
+ |--- TV
+ |--- IMAGE
+ |--- RIGHTBUS
+ |--- PERIR
+ |--- MFC(R)
+ |--- CAM
+ |--- FSYS
+ |--- GPS
+ |--- LCD0
+ |--- ISP
+
+- In case of Exynos5422, there are two power line as following:
+ VDD_MIF |--- DREX 0 (parent device, DRAM EXpress controller)
+ |--- DREX 1
+
+ VDD_INT |--- NoC_Core (parent device)
+ |--- G2D
+ |--- G3D
+ |--- DISP1
+ |--- NoC_WCORE
+ |--- GSCL
+ |--- MSCL
+ |--- ISP
+ |--- MFC
+ |--- GEN
+ |--- PERIS
+ |--- PERIC
+ |--- FSYS
+ |--- FSYS2
+
+Example1:
+ Show the AXI buses of Exynos3250 SoC. Exynos3250 divides the buses to
+ power line (regulator). The MIF (Memory Interface) AXI bus is used to
+ transfer data between DRAM and CPU and uses the VDD_MIF regulator.
+
+ - MIF (Memory Interface) block
+ : VDD_MIF |--- DMC (Dynamic Memory Controller)
+
+ - INT (Internal) block
+ : VDD_INT |--- LEFTBUS (parent device)
+ |--- PERIL
+ |--- MFC
+ |--- G3D
+ |--- RIGHTBUS
+ |--- FSYS
+ |--- LCD0
+ |--- PERIR
+ |--- ISP
+ |--- CAM
+
+ - MIF bus's frequency/voltage table
+ -----------------------
+ |Lv| Freq | Voltage |
+ -----------------------
+ |L1| 50000 |800000 |
+ |L2| 100000 |800000 |
+ |L3| 134000 |800000 |
+ |L4| 200000 |825000 |
+ |L5| 400000 |875000 |
+ -----------------------
+
+ - INT bus's frequency/voltage table
+ ----------------------------------------------------------
+ |Block|LEFTBUS|RIGHTBUS|MCUISP |ISP |PERIL ||VDD_INT |
+ | name| |LCD0 | | | || |
+ | | |FSYS | | | || |
+ | | |MFC | | | || |
+ ----------------------------------------------------------
+ |Mode |*parent|passive |passive|passive|passive|| |
+ ----------------------------------------------------------
+ |Lv |Frequency ||Voltage |
+ ----------------------------------------------------------
+ |L1 |50000 |50000 |50000 |50000 |50000 ||900000 |
+ |L2 |80000 |80000 |80000 |80000 |80000 ||900000 |
+ |L3 |100000 |100000 |100000 |100000 |100000 ||1000000 |
+ |L4 |134000 |134000 |200000 |200000 | ||1000000 |
+ |L5 |200000 |200000 |400000 |300000 | ||1000000 |
+ ----------------------------------------------------------
+
+Example2 :
+ The bus of DMC (Dynamic Memory Controller) block in exynos3250.dtsi
+ is listed below:
+
+ bus_dmc: bus_dmc {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu_dmc CLK_DIV_DMC>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_dmc_opp_table>;
+ status = "disabled";
+ };
+
+ bus_dmc_opp_table: opp_table1 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ opp-microvolt = <800000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ opp-microvolt = <800000>;
+ };
+ opp@134000000 {
+ opp-hz = /bits/ 64 <134000000>;
+ opp-microvolt = <800000>;
+ };
+ opp@200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ opp-microvolt = <825000>;
+ };
+ opp@400000000 {
+ opp-hz = /bits/ 64 <400000000>;
+ opp-microvolt = <875000>;
+ };
+ };
+
+ bus_leftbus: bus_leftbus {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_GDL>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_rightbus: bus_rightbus {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_GDR>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_lcd0: bus_lcd0 {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_160>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_fsys: bus_fsys {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_200>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_mcuisp: bus_mcuisp {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_400_MCUISP>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_mcuisp_opp_table>;
+ status = "disabled";
+ };
+
+ bus_isp: bus_isp {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_266>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_isp_opp_table>;
+ status = "disabled";
+ };
+
+ bus_peril: bus_peril {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_DIV_ACLK_100>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_peril_opp_table>;
+ status = "disabled";
+ };
+
+ bus_mfc: bus_mfc {
+ compatible = "samsung,exynos-bus";
+ clocks = <&cmu CLK_SCLK_MFC>;
+ clock-names = "bus";
+ operating-points-v2 = <&bus_leftbus_opp_table>;
+ status = "disabled";
+ };
+
+ bus_leftbus_opp_table: opp_table1 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ opp-microvolt = <900000>;
+ };
+ opp@80000000 {
+ opp-hz = /bits/ 64 <80000000>;
+ opp-microvolt = <900000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ opp-microvolt = <1000000>;
+ };
+ opp@134000000 {
+ opp-hz = /bits/ 64 <134000000>;
+ opp-microvolt = <1000000>;
+ };
+ opp@200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ opp-microvolt = <1000000>;
+ };
+ };
+
+ bus_mcuisp_opp_table: opp_table2 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ };
+ opp@80000000 {
+ opp-hz = /bits/ 64 <80000000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ };
+ opp@200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ };
+ opp@400000000 {
+ opp-hz = /bits/ 64 <400000000>;
+ };
+ };
+
+ bus_isp_opp_table: opp_table3 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ };
+ opp@80000000 {
+ opp-hz = /bits/ 64 <80000000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ };
+ opp@200000000 {
+ opp-hz = /bits/ 64 <200000000>;
+ };
+ opp@300000000 {
+ opp-hz = /bits/ 64 <300000000>;
+ };
+ };
+
+ bus_peril_opp_table: opp_table4 {
+ compatible = "operating-points-v2";
+ opp-shared;
+
+ opp@50000000 {
+ opp-hz = /bits/ 64 <50000000>;
+ };
+ opp@80000000 {
+ opp-hz = /bits/ 64 <80000000>;
+ };
+ opp@100000000 {
+ opp-hz = /bits/ 64 <100000000>;
+ };
+ };
+
+
+ Usage case to handle the frequency and voltage of bus on runtime
+ in exynos3250-rinato.dts is listed below:
+
+ &bus_dmc {
+ devfreq-events = <&ppmu_dmc0_3>, <&ppmu_dmc1_3>;
+ vdd-supply = <&buck1_reg>; /* VDD_MIF */
+ status = "okay";
+ };
+
+ &bus_leftbus {
+ devfreq-events = <&ppmu_leftbus_3>, <&ppmu_rightbus_3>;
+ vdd-supply = <&buck3_reg>;
+ status = "okay";
+ };
+
+ &bus_rightbus {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_lcd0 {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_fsys {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_mcuisp {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_isp {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_peril {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
+
+ &bus_mfc {
+ devfreq = <&bus_leftbus>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt b/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt
index 1396078d15ac..baf9b34d20bf 100644
--- a/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt
+++ b/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt
@@ -12,6 +12,10 @@ Required properties:
- reg: Should contain DMA registers location and length.
- interrupts: Should contain the DMA interrupts associated
to the DMA channels in ascending order.
+- interrupt-names: Should contain the names of the interrupt
+ in the form "dmaXX".
+ Use "dma-shared-all" for the common interrupt line
+ that is shared by all dma channels.
- #dma-cells: Must be <1>, the cell in the dmas property of the
client device represents the DREQ number.
- brcm,dma-channel-mask: Bit mask representing the channels
@@ -34,13 +38,35 @@ dma: dma@7e007000 {
<1 24>,
<1 25>,
<1 26>,
+ /* dma channel 11-14 share one irq */
<1 27>,
+ <1 27>,
+ <1 27>,
+ <1 27>,
+ /* unused shared irq for all channels */
<1 28>;
+ interrupt-names = "dma0",
+ "dma1",
+ "dma2",
+ "dma3",
+ "dma4",
+ "dma5",
+ "dma6",
+ "dma7",
+ "dma8",
+ "dma9",
+ "dma10",
+ "dma11",
+ "dma12",
+ "dma13",
+ "dma14",
+ "dma-shared-all";
#dma-cells = <1>;
brcm,dma-channel-mask = <0x7f35>;
};
+
DMA clients connected to the BCM2835 DMA controller must use the format
described in the dma.txt file, using a two-cell specifier for each channel.
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index dc8d3aac1aa9..175f0e44ed85 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -58,6 +58,15 @@ The third cell specifies the transfer priority as below.
1 Medium
2 Low
+Optional properties:
+
+- gpr : The phandle to the General Purpose Register (GPR) node.
+- fsl,sdma-event-remap : Register bits of sdma event remap, the format is
+ <reg shift val>.
+ reg is the GPR register offset.
+ shift is the bit position inside the GPR register.
+ val is the value of the bit (0 or 1).
+
Examples:
sdma@83fb0000 {
@@ -83,3 +92,21 @@ ssi2: ssi@70014000 {
dma-names = "rx", "tx";
fsl,fifo-depth = <15>;
};
+
+Using the fsl,sdma-event-remap property:
+
+If we want to use SDMA on the SAI1 port on a MX6SX:
+
+&sdma {
+ gpr = <&gpr>;
+ /* SDMA events remap for SAI1_RX and SAI1_TX */
+ fsl,sdma-event-remap = <0 15 1>, <0 16 1>;
+};
+
+The fsl,sdma-event-remap property in this case has two values:
+- <0 15 1> means that the offset is 0, so GPR0 is the register of the
+SDMA remap. Bit 15 of GPR0 selects between UART4_RX and SAI1_RX.
+Setting bit 15 to 1 selects SAI1_RX.
+- <0 16 1> means that the offset is 0, so GPR0 is the register of the
+SDMA remap. Bit 16 of GPR0 selects between UART4_TX and SAI1_TX.
+Setting bit 16 to 1 selects SAI1_TX.
diff --git a/Documentation/devicetree/bindings/dma/mv-xor.txt b/Documentation/devicetree/bindings/dma/mv-xor.txt
index 276ef815ef32..c075f5988135 100644
--- a/Documentation/devicetree/bindings/dma/mv-xor.txt
+++ b/Documentation/devicetree/bindings/dma/mv-xor.txt
@@ -1,7 +1,10 @@
* Marvell XOR engines
Required properties:
-- compatible: Should be "marvell,orion-xor" or "marvell,armada-380-xor"
+- compatible: Should be one of the following:
+ - "marvell,orion-xor"
+ - "marvell,armada-380-xor"
+ - "marvell,armada-3700-xor".
- reg: Should contain registers location and length (two sets)
the first set is the low registers, the second set the high
registers for the XOR engine.
diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt
new file mode 100644
index 000000000000..1e1dc8f972e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/nvidia,tegra210-adma.txt
@@ -0,0 +1,55 @@
+* NVIDIA Tegra Audio DMA (ADMA) controller
+
+The Tegra Audio DMA controller that is used for transferring data
+between system memory and the Audio Processing Engine (APE).
+
+Required properties:
+- compatible: Must be "nvidia,tegra210-adma".
+- reg: Should contain DMA registers location and length. This should be
+ a single entry that includes all of the per-channel registers in one
+ contiguous bank.
+- interrupt-parent: Phandle to the interrupt parent controller.
+- interrupts: Should contain all of the per-channel DMA interrupts in
+ ascending order with respect to the DMA channel index.
+- clocks: Must contain one entry for the ADMA module clock
+ (TEGRA210_CLK_D_AUDIO).
+- clock-names: Must contain the name "d_audio" for the corresponding
+ 'clocks' entry.
+- #dma-cells : Must be 1. The first cell denotes the receive/transmit
+ request number and should be between 1 and the maximum number of
+ requests supported. This value corresponds to the RX/TX_REQUEST_SELECT
+ fields in the ADMA_CHn_CTRL register.
+
+
+Example:
+
+adma: dma@702e2000 {
+ compatible = "nvidia,tegra210-adma";
+ reg = <0x0 0x702e2000 0x0 0x2000>;
+ interrupt-parent = <&tegra_agic>;
+ interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 43 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_D_AUDIO>;
+ clock-names = "d_audio";
+ #dma-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
index 1c9d48ea4914..9cbf5d9df8fd 100644
--- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
@@ -13,6 +13,8 @@ Required properties:
- clock-names: must contain "bam_clk" entry
- qcom,ee : indicates the active Execution Environment identifier (0-7) used in
the secure world.
+- qcom,controlled-remotely : optional, indicates that the bam is controlled by
+ remote proccessor i.e. execution environment.
Example:
diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
index c261598164a7..0f5583293c9c 100644
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -13,6 +13,11 @@ Required properties:
- chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1:
increase from chan n->0
- block_size: Maximum block size supported by the controller
+- data-width: Maximum data width supported by hardware per AHB master
+ (in bytes, power of 2)
+
+
+Deprecated properties:
- data_width: Maximum data width supported by hardware per AHB master
(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
@@ -38,7 +43,7 @@ Example:
chan_allocation_order = <1>;
chan_priority = <1>;
block_size = <0xfff>;
- data_width = <3 3>;
+ data-width = <8 8>;
};
DMA clients connected to the Designware DMA controller must use the format
@@ -47,8 +52,8 @@ The four cells in order are:
1. A phandle pointing to the DMA controller
2. The DMA request line number
-3. Source master for transfers on allocated channel
-4. Destination master for transfers on allocated channel
+3. Memory master for transfers on allocated channel
+4. Peripheral master for transfers on allocated channel
Example:
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
index e4c4d47f8137..a1f2683c49bf 100644
--- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
+++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
@@ -3,18 +3,44 @@ It can be configured to have one channel or two channels. If configured
as two channels, one is to transmit to the video device and another is
to receive from the video device.
+Xilinx AXI DMA engine, it does transfers between memory and AXI4 stream
+target devices. It can be configured to have one channel or two channels.
+If configured as two channels, one is to transmit to the device and another
+is to receive from the device.
+
+Xilinx AXI CDMA engine, it does transfers between memory-mapped source
+address and a memory-mapped destination address.
+
Required properties:
-- compatible: Should be "xlnx,axi-vdma-1.00.a"
+- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a" or
+ "xlnx,axi-cdma-1.00.a""
- #dma-cells: Should be <1>, see "dmas" property below
- reg: Should contain VDMA registers location and length.
-- xlnx,num-fstores: Should be the number of framebuffers as configured in h/w.
+- xlnx,addrwidth: Should be the vdma addressing size in bits(ex: 32 bits).
+- dma-ranges: Should be as the following <dma_addr cpu_addr max_len>.
- dma-channel child node: Should have at least one channel and can have up to
two channels per device. This node specifies the properties of each
DMA channel (see child node properties below).
+- clocks: Input clock specifier. Refer to common clock bindings.
+- clock-names: List of input clocks
+ For VDMA:
+ Required elements: "s_axi_lite_aclk"
+ Optional elements: "m_axi_mm2s_aclk" "m_axi_s2mm_aclk",
+ "m_axis_mm2s_aclk", "s_axis_s2mm_aclk"
+ For CDMA:
+ Required elements: "s_axi_lite_aclk", "m_axi_aclk"
+ FOR AXIDMA:
+ Required elements: "s_axi_lite_aclk"
+ Optional elements: "m_axi_mm2s_aclk", "m_axi_s2mm_aclk",
+ "m_axi_sg_aclk"
+
+Required properties for VDMA:
+- xlnx,num-fstores: Should be the number of framebuffers as configured in h/w.
Optional properties:
- xlnx,include-sg: Tells configured for Scatter-mode in
the hardware.
+Optional properties for VDMA:
- xlnx,flush-fsync: Tells which channel to Flush on Frame sync.
It takes following values:
{1}, flush both channels
@@ -31,6 +57,7 @@ Required child node properties:
Optional child node properties:
- xlnx,include-dre: Tells hardware is configured for Data
Realignment Engine.
+Optional child node properties for VDMA:
- xlnx,genlock-mode: Tells Genlock synchronization is
enabled/disabled in hardware.
@@ -41,8 +68,13 @@ axi_vdma_0: axivdma@40030000 {
compatible = "xlnx,axi-vdma-1.00.a";
#dma_cells = <1>;
reg = < 0x40030000 0x10000 >;
+ dma-ranges = <0x00000000 0x00000000 0x40000000>;
xlnx,num-fstores = <0x8>;
xlnx,flush-fsync = <0x1>;
+ xlnx,addrwidth = <0x20>;
+ clocks = <&clk 0>, <&clk 1>, <&clk 2>, <&clk 3>, <&clk 4>;
+ clock-names = "s_axi_lite_aclk", "m_axi_mm2s_aclk", "m_axi_s2mm_aclk",
+ "m_axis_mm2s_aclk", "s_axis_s2mm_aclk";
dma-channel@40030000 {
compatible = "xlnx,axi-vdma-mm2s-channel";
interrupts = < 0 54 4 >;
diff --git a/Documentation/devicetree/bindings/gpio/gpio-74x164.txt b/Documentation/devicetree/bindings/gpio/gpio-74x164.txt
index cc2608021f26..ce1b2231bf5d 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-74x164.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-74x164.txt
@@ -1,7 +1,9 @@
* Generic 8-bits shift register GPIO driver
Required properties:
-- compatible : Should be "fairchild,74hc595"
+- compatible: Should contain one of the following:
+ "fairchild,74hc595"
+ "nxp,74lvc594"
- reg : chip select number
- gpio-controller : Marks the device node as a gpio controller.
- #gpio-cells : Should be two. The first cell is the pin number and
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt b/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
index 120bc4971cf3..4b6cc632ca5c 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
@@ -1,9 +1,10 @@
-* Freescale MPC512x/MPC8xxx/Layerscape GPIO controller
+* Freescale MPC512x/MPC8xxx/QorIQ/Layerscape GPIO controller
Required properties:
- compatible : Should be "fsl,<soc>-gpio"
The following <soc>s are known to be supported:
- mpc5121, mpc5125, mpc8349, mpc8572, mpc8610, pq3, qoriq.
+ mpc5121, mpc5125, mpc8349, mpc8572, mpc8610, pq3, qoriq,
+ ls1021a, ls1043a, ls2080a.
- reg : Address and length of the register set for the device
- interrupts : Should be the port interrupt shared by all 32 pins.
- #gpio-cells : Should be two. The first cell is the pin number and
@@ -15,7 +16,7 @@ Optional properties:
- little-endian : GPIO registers are used as little endian. If not
present registers are used as big endian by default.
-Example:
+Example of gpio-controller node for a mpc5125 SoC:
gpio0: gpio@1100 {
compatible = "fsl,mpc5125-gpio";
@@ -24,3 +25,16 @@ gpio0: gpio@1100 {
interrupts = <78 0x8>;
status = "okay";
};
+
+Example of gpio-controller node for a ls2080a SoC:
+
+gpio0: gpio@2300000 {
+ compatible = "fsl,ls2080a-gpio", "fsl,qoriq-gpio";
+ reg = <0x0 0x2300000 0x0 0x10000>;
+ interrupts = <0 36 0x4>; /* Level high type */
+ gpio-controller;
+ little-endian;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-xlp.txt b/Documentation/devicetree/bindings/gpio/gpio-xlp.txt
index 262ee4ddf2cb..28662d83a43e 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-xlp.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-xlp.txt
@@ -3,6 +3,8 @@ Netlogic XLP Family GPIO
This GPIO driver is used for following Netlogic XLP SoCs:
XLP832, XLP316, XLP208, XLP980, XLP532
+This GPIO driver is also compatible with GPIO controller found on
+Broadcom Vulcan ARM64.
Required properties:
-------------------
@@ -13,6 +15,7 @@ Required properties:
- "netlogic,xlp208-gpio": For Netlogic XLP208
- "netlogic,xlp980-gpio": For Netlogic XLP980
- "netlogic,xlp532-gpio": For Netlogic XLP532
+ - "brcm,vulcan-gpio": For Broadcom Vulcan ARM64
- reg: Physical base address and length of the controller's registers.
- #gpio-cells: Should be two. The first cell is the pin number and the second
cell is used to specify optional parameters (currently unused).
diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt
index 069cdf6f9dac..68d28f62a6f4 100644
--- a/Documentation/devicetree/bindings/gpio/gpio.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio.txt
@@ -131,6 +131,13 @@ Every GPIO controller node must contain both an empty "gpio-controller"
property, and a #gpio-cells integer property, which indicates the number of
cells in a gpio-specifier.
+Some system-on-chips (SoCs) use the concept of GPIO banks. A GPIO bank is an
+instance of a hardware IP core on a silicon die, usually exposed to the
+programmer as a coherent range of I/O addresses. Usually each such bank is
+exposed in the device tree as an individual gpio-controller node, reflecting
+the fact that the hardware was synthesized by reusing the same IP block a
+few times over.
+
Optionally, a GPIO controller may have a "ngpios" property. This property
indicates the number of in-use slots of available slots for GPIOs. The
typical example is something like this: the hardware register is 32 bits
@@ -145,6 +152,21 @@ additional bitmask is needed to specify which GPIOs are actually in use,
and which are dummies. The bindings for this case has not yet been
specified, but should be specified if/when such hardware appears.
+Optionally, a GPIO controller may have a "gpio-line-names" property. This is
+an array of strings defining the names of the GPIO lines going out of the
+GPIO controller. This name should be the most meaningful producer name
+for the system, such as a rail name indicating the usage. Package names
+such as pin name are discouraged: such lines have opaque names (since they
+are by definition generic purpose) and such names are usually not very
+helpful. For example "MMC-CD", "Red LED Vdd" and "ethernet reset" are
+reasonable line names as they describe what the line is used for. "GPIO0"
+is not a good name to give to a GPIO line. Placeholders are discouraged:
+rather use the "" (blank string) if the use of the GPIO line is undefined
+in your design. The names are assigned starting from line offset 0 from
+left to right from the passed array. An incomplete array (where the number
+of passed named are less than ngpios) will still be used up until the last
+provided valid line index.
+
Example:
gpio-controller@00000000 {
@@ -153,6 +175,10 @@ gpio-controller@00000000 {
gpio-controller;
#gpio-cells = <2>;
ngpios = <18>;
+ gpio-line-names = "MMC-CD", "MMC-WP", "VDD eth", "RST eth", "LED R",
+ "LED G", "LED B", "Col A", "Col B", "Col C", "Col D",
+ "Row A", "Row B", "Row C", "Row D", "NMI button",
+ "poweroff", "reset";
}
The GPIO chip may contain GPIO hog definitions. GPIO hogging is a mechanism
diff --git a/Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.txt b/Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.txt
new file mode 100644
index 000000000000..c82a2e221bc1
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/nvidia,tegra186-gpio.txt
@@ -0,0 +1,161 @@
+NVIDIA Tegra186 GPIO controllers
+
+Tegra186 contains two GPIO controllers; a main controller and an "AON"
+controller. This binding document applies to both controllers. The register
+layouts for the controllers share many similarities, but also some significant
+differences. Hence, this document describes closely related but different
+bindings and compatible values.
+
+The Tegra186 GPIO controller allows software to set the IO direction of, and
+read/write the value of, numerous GPIO signals. Routing of GPIO signals to
+package balls is under the control of a separate pin controller HW block. Two
+major sets of registers exist:
+
+a) Security registers, which allow configuration of allowed access to the GPIO
+register set. These registers exist in a single contiguous block of physical
+address space. The size of this block, and the security features available,
+varies between the different GPIO controllers.
+
+Access to this set of registers is not necessary in all circumstances. Code
+that wishes to configure access to the GPIO registers needs access to these
+registers to do so. Code which simply wishes to read or write GPIO data does not
+need access to these registers.
+
+b) GPIO registers, which allow manipulation of the GPIO signals. In some GPIO
+controllers, these registers are exposed via multiple "physical aliases" in
+address space, each of which access the same underlying state. See the hardware
+documentation for rationale. Any particular GPIO client is expected to access
+just one of these physical aliases.
+
+Tegra HW documentation describes a unified naming convention for all GPIOs
+implemented by the SoC. Each GPIO is assigned to a port, and a port may control
+a number of GPIOs. Thus, each GPIO is named according to an alphabetical port
+name and an integer GPIO name within the port. For example, GPIO_PA0, GPIO_PN6,
+or GPIO_PCC3.
+
+The number of ports implemented by each GPIO controller varies. The number of
+implemented GPIOs within each port varies. GPIO registers within a controller
+are grouped and laid out according to the port they affect.
+
+The mapping from port name to the GPIO controller that implements that port, and
+the mapping from port name to register offset within a controller, are both
+extremely non-linear. The header file <dt-bindings/gpio/tegra186-gpio.h>
+describes the port-level mapping. In that file, the naming convention for ports
+matches the HW documentation. The values chosen for the names are alphabetically
+sorted within a particular controller. Drivers need to map between the DT GPIO
+IDs and HW register offsets using a lookup table.
+
+Each GPIO controller can generate a number of interrupt signals. Each signal
+represents the aggregate status for all GPIOs within a set of ports. Thus, the
+number of interrupt signals generated by a controller varies as a rough function
+of the number of ports it implements. Note that the HW documentation refers to
+both the overall controller HW module and the sets-of-ports as "controllers".
+
+Each GPIO controller in fact generates multiple interrupts signals for each set
+of ports. Each GPIO may be configured to feed into a specific one of the
+interrupt signals generated by a set-of-ports. The intent is for each generated
+signal to be routed to a different CPU, thus allowing different CPUs to each
+handle subsets of the interrupts within a port. The status of each of these
+per-port-set signals is reported via a separate register. Thus, a driver needs
+to know which status register to observe. This binding currently defines no
+configuration mechanism for this. By default, drivers should use register
+GPIO_${port}_INTERRUPT_STATUS_G1_0. Future revisions to the binding could
+define a property to configure this.
+
+Required properties:
+- compatible
+ Array of strings.
+ One of:
+ - "nvidia,tegra186-gpio".
+ - "nvidia,tegra186-gpio-aon".
+- reg-names
+ Array of strings.
+ Contains a list of names for the register spaces described by the reg
+ property. May contain the following entries, in any order:
+ - "gpio": Mandatory. GPIO control registers. This may cover either:
+ a) The single physical alias that this OS should use.
+ b) All physical aliases that exist in the controller. This is
+ appropriate when the OS is responsible for managing assignment of
+ the physical aliases.
+ - "security": Optional. Security configuration registers.
+ Users of this binding MUST look up entries in the reg property by name,
+ using this reg-names property to do so.
+- reg
+ Array of (physical base address, length) tuples.
+ Must contain one entry per entry in the reg-names property, in a matching
+ order.
+- interrupts
+ Array of interrupt specifiers.
+ The interrupt outputs from the HW block, one per set of ports, in the
+ order the HW manual describes them. The number of entries required varies
+ depending on compatible value:
+ - "nvidia,tegra186-gpio": 6 entries.
+ - "nvidia,tegra186-gpio-aon": 1 entry.
+- gpio-controller
+ Boolean.
+ Marks the device node as a GPIO controller/provider.
+- #gpio-cells
+ Single-cell integer.
+ Must be <2>.
+ Indicates how many cells are used in a consumer's GPIO specifier.
+ In the specifier:
+ - The first cell is the pin number.
+ See <dt-bindings/gpio/tegra186-gpio.h>.
+ - The second cell contains flags:
+ - Bit 0 specifies polarity
+ - 0: Active-high (normal).
+ - 1: Active-low (inverted).
+- interrupt-controller
+ Boolean.
+ Marks the device node as an interrupt controller/provider.
+- #interrupt-cells
+ Single-cell integer.
+ Must be <2>.
+ Indicates how many cells are used in a consumer's interrupt specifier.
+ In the specifier:
+ - The first cell is the GPIO number.
+ See <dt-bindings/gpio/tegra186-gpio.h>.
+ - The second cell is contains flags:
+ - Bits [3:0] indicate trigger type and level:
+ - 1: Low-to-high edge triggered.
+ - 2: High-to-low edge triggered.
+ - 4: Active high level-sensitive.
+ - 8: Active low level-sensitive.
+ Valid combinations are 1, 2, 3, 4, 8.
+
+Example:
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+gpio@2200000 {
+ compatible = "nvidia,tegra186-gpio";
+ reg-names = "security", "gpio";
+ reg =
+ <0x0 0x2200000 0x0 0x10000>,
+ <0x0 0x2210000 0x0 0x10000>;
+ interrupts =
+ <0 47 IRQ_TYPE_LEVEL_HIGH>,
+ <0 50 IRQ_TYPE_LEVEL_HIGH>,
+ <0 53 IRQ_TYPE_LEVEL_HIGH>,
+ <0 56 IRQ_TYPE_LEVEL_HIGH>,
+ <0 59 IRQ_TYPE_LEVEL_HIGH>,
+ <0 180 IRQ_TYPE_LEVEL_HIGH>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+};
+
+gpio@c2f0000 {
+ compatible = "nvidia,tegra186-gpio-aon";
+ reg-names = "security", "gpio";
+ reg =
+ <0x0 0xc2f0000 0x0 0x1000>,
+ <0x0 0xc2f1000 0x0 0x1000>;
+ interrupts =
+ <0 60 IRQ_TYPE_LEVEL_HIGH>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/wd,mbl-gpio.txt b/Documentation/devicetree/bindings/gpio/wd,mbl-gpio.txt
new file mode 100644
index 000000000000..038c3a6a1f4d
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/wd,mbl-gpio.txt
@@ -0,0 +1,38 @@
+Bindings for the Western Digital's MyBook Live memory-mapped GPIO controllers.
+
+The Western Digital MyBook Live has two memory-mapped GPIO controllers.
+Both GPIO controller only have a single 8-bit data register, where GPIO
+state can be read and/or written.
+
+Required properties:
+ - compatible: should be "wd,mbl-gpio"
+ - reg-names: must contain
+ "dat" - data register
+ - reg: address + size pairs describing the GPIO register sets;
+ order must correspond with the order of entries in reg-names
+ - #gpio-cells: must be set to 2. The first cell is the pin number and
+ the second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+ - gpio-controller: Marks the device node as a gpio controller.
+
+Optional properties:
+ - no-output: GPIOs are read-only.
+
+Examples:
+ gpio0: gpio0@e0000000 {
+ compatible = "wd,mbl-gpio";
+ reg-names = "dat";
+ reg = <0xe0000000 0x1>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ };
+
+ gpio1: gpio1@e0100000 {
+ compatible = "wd,mbl-gpio";
+ reg-names = "dat";
+ reg = <0xe0100000 0x1>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ no-output;
+ };
diff --git a/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt b/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt
index 23bfe8e1f7cc..ff3db65e50de 100644
--- a/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt
+++ b/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt
@@ -1,9 +1,10 @@
-NVIDIA GK20A Graphics Processing Unit
+NVIDIA Tegra Graphics Processing Units
Required properties:
-- compatible: "nvidia,<chip>-<gpu>"
+- compatible: "nvidia,<gpu>"
Currently recognized values:
- - nvidia,tegra124-gk20a
+ - nvidia,gk20a
+ - nvidia,gm20b
- reg: Physical base address and length of the controller's registers.
Must contain two entries:
- first entry for bar0
@@ -19,14 +20,20 @@ Required properties:
- clock-names: Must include the following entries:
- gpu
- pwr
+If the compatible string is "nvidia,gm20b", then the following clock
+is also required:
+ - ref
- resets: Must contain an entry for each entry in reset-names.
See ../reset/reset.txt for details.
- reset-names: Must include the following entries:
- gpu
-Example:
+Optional properties:
+- iommus: A reference to the IOMMU. See ../iommu/iommu.txt for details.
- gpu@0,57000000 {
+Example for GK20A:
+
+ gpu@57000000 {
compatible = "nvidia,gk20a";
reg = <0x0 0x57000000 0x0 0x01000000>,
<0x0 0x58000000 0x0 0x01000000>;
@@ -39,5 +46,25 @@ Example:
clock-names = "gpu", "pwr";
resets = <&tegra_car 184>;
reset-names = "gpu";
+ iommus = <&mc TEGRA_SWGROUP_GPU>;
+ status = "disabled";
+ };
+
+Example for GM20B:
+
+ gpu@57000000 {
+ compatible = "nvidia,gm20b";
+ reg = <0x0 0x57000000 0x0 0x01000000>,
+ <0x0 0x58000000 0x0 0x01000000>;
+ interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "stall", "nonstall";
+ clocks = <&tegra_car TEGRA210_CLK_GPU>,
+ <&tegra_car TEGRA210_CLK_PLL_P_OUT5>,
+ <&tegra_car TEGRA210_CLK_PLL_G_REF>;
+ clock-names = "gpu", "pwr", "ref";
+ resets = <&tegra_car 184>;
+ reset-names = "gpu";
+ iommus = <&mc TEGRA_SWGROUP_GPU>;
status = "disabled";
};
diff --git a/Documentation/devicetree/bindings/hwmon/ltc2978.txt b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
index a7afbf60bb9c..bf2a47bbdc58 100644
--- a/Documentation/devicetree/bindings/hwmon/ltc2978.txt
+++ b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
@@ -13,6 +13,7 @@ Required properties:
* "lltc,ltc3886"
* "lltc,ltc3887"
* "lltc,ltm2987"
+ * "lltc,ltm4675"
* "lltc,ltm4676"
- reg: I2C slave address
diff --git a/Documentation/devicetree/bindings/i2c/i2c-octeon.txt b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt
index dced82ebe31d..872d485dffab 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-octeon.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt
@@ -4,6 +4,12 @@
Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+ or
+
+ compatible: "cavium,octeon-7890-twsi"
+
+ Compatibility with cn78XX SOCs.
+
- reg: The base address of the TWSI/I2C bus controller register bank.
- #address-cells: Must be <1>.
diff --git a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
index cf8bfc956cdc..5f0cb502b1db 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
@@ -19,6 +19,9 @@ Optional properties:
- clock-frequency: desired I2C bus clock frequency in Hz. The absence of this
property indicates the default frequency 100 kHz.
- clocks: clock specifier.
+- dmas: Must contain a list of two references to DMA specifiers, one for
+ transmission, and one for reception.
+- dma-names: Must contain a list of two DMA names, "tx" and "rx".
- i2c-scl-falling-time-ns: see i2c.txt
- i2c-scl-internal-delay-ns: see i2c.txt
diff --git a/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt b/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt
index f0d71bc52e64..0b4a85fe2d86 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt
@@ -6,8 +6,8 @@ RK3xxx SoCs.
Required properties :
- reg : Offset and length of the register set for the device
- - compatible : should be "rockchip,rk3066-i2c", "rockchip,rk3188-i2c" or
- "rockchip,rk3288-i2c".
+ - compatible : should be "rockchip,rk3066-i2c", "rockchip,rk3188-i2c",
+ "rockchip,rk3228-i2c" or "rockchip,rk3288-i2c".
- interrupts : interrupt number
- clocks : parent clock
diff --git a/Documentation/devicetree/bindings/input/gpio-keys.txt b/Documentation/devicetree/bindings/input/gpio-keys.txt
index 21641236c095..a94940481e55 100644
--- a/Documentation/devicetree/bindings/input/gpio-keys.txt
+++ b/Documentation/devicetree/bindings/input/gpio-keys.txt
@@ -32,17 +32,17 @@ Optional subnode-properties:
Example nodes:
- gpio_keys {
+ gpio-keys {
compatible = "gpio-keys";
- #address-cells = <1>;
- #size-cells = <0>;
autorepeat;
- button@21 {
+
+ up {
label = "GPIO Key UP";
linux,code = <103>;
gpios = <&gpio1 0 1>;
};
- button@22 {
+
+ down {
label = "GPIO Key DOWN";
linux,code = <108>;
interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
diff --git a/Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt b/Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt
index 34e3382a0659..ac5dff412e25 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt
@@ -2,11 +2,17 @@
Required properties:
- compatible: must be "brcm,iproc-touchscreen"
-- reg: physical base address of the controller and length of memory mapped
- region.
+- ts_syscon: handler of syscon node defining physical base
+ address of the controller and length of memory mapped region.
+ If this property is selected please make sure MFD_SYSCON config
+ is enabled in the defconfig file.
- clocks: The clock provided by the SOC to driver the tsc
-- clock-name: name for the clock
+- clock-names: name for the clock
- interrupts: The touchscreen controller's interrupt
+- address-cells: Specify the number of u32 entries needed in child nodes.
+ Should set to 1.
+- size-cells: Specify number of u32 entries needed to specify child nodes size
+ in reg property. Should set to 1.
Optional properties:
- scanning_period: Time between scans. Each step is 1024 us. Valid 1-256.
@@ -53,13 +59,18 @@ Optional properties:
- touchscreen-inverted-x: X axis is inverted (boolean)
- touchscreen-inverted-y: Y axis is inverted (boolean)
-Example:
+Example: An example of touchscreen node
- touchscreen: tsc@0x180A6000 {
+ ts_adc_syscon: ts_adc_syscon@180a6000 {
+ compatible = "brcm,iproc-ts-adc-syscon","syscon";
+ reg = <0x180a6000 0xc30>;
+ };
+
+ touchscreen: touchscreen@180A6000 {
compatible = "brcm,iproc-touchscreen";
#address-cells = <1>;
#size-cells = <1>;
- reg = <0x180A6000 0x40>;
+ ts_syscon = <&ts_adc_syscon>;
clocks = <&adc_clk>;
clock-names = "tsc_clk";
interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
index 007a5b46256a..4c29cdab0ea5 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt
@@ -11,6 +11,8 @@ Main node required properties:
- interrupt-controller : Identifies the node as an interrupt controller
- #interrupt-cells : Specifies the number of cells needed to encode an
interrupt source. Must be a single cell with a value of at least 3.
+ If the system requires describing PPI affinity, then the value must
+ be at least 4.
The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
interrupts. Other values are reserved for future use.
@@ -24,7 +26,14 @@ Main node required properties:
1 = edge triggered
4 = level triggered
- Cells 4 and beyond are reserved for future use and must have a value
+ The 4th cell is a phandle to a node describing a set of CPUs this
+ interrupt is affine to. The interrupt must be a PPI, and the node
+ pointed must be a subnode of the "ppi-partitions" subnode. For
+ interrupt types other than PPI or PPIs that are not partitionned,
+ this cell must be zero. See the "ppi-partitions" node description
+ below.
+
+ Cells 5 and beyond are reserved for future use and must have a value
of 0 if present.
- reg : Specifies base physical address(s) and size of the GIC
@@ -50,6 +59,11 @@ Optional
Sub-nodes:
+PPI affinity can be expressed as a single "ppi-partitions" node,
+containing a set of sub-nodes, each with the following property:
+- affinity: Should be a list of phandles to CPU nodes (as described in
+Documentation/devicetree/bindings/arm/cpus.txt).
+
GICv3 has one or more Interrupt Translation Services (ITS) that are
used to route Message Signalled Interrupts (MSI) to the CPUs.
@@ -91,7 +105,7 @@ Examples:
gic: interrupt-controller@2c010000 {
compatible = "arm,gic-v3";
- #interrupt-cells = <3>;
+ #interrupt-cells = <4>;
#address-cells = <2>;
#size-cells = <2>;
ranges;
@@ -119,4 +133,20 @@ Examples:
#msi-cells = <1>;
reg = <0x0 0x2c400000 0 0x200000>;
};
+
+ ppi-partitions {
+ part0: interrupt-partition-0 {
+ affinity = <&cpu0 &cpu2>;
+ };
+
+ part1: interrupt-partition-1 {
+ affinity = <&cpu1 &cpu3>;
+ };
+ };
+ };
+
+
+ device@0 {
+ reg = <0 0 0 4>;
+ interrupts = <1 1 4 &part0>;
};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.txt b/Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.txt
index c9cf605bb995..2a1d16bdf834 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.txt
@@ -6,7 +6,7 @@ controllers are OR:ed together and fed to the CPU tile's IRQ input. Each
instance can handle up to 32 interrupts.
Required properties:
-- compatible: "arm,versatile-fpga-irq"
+- compatible: "arm,versatile-fpga-irq" or "oxsemi,ox810se-rps-irq"
- interrupt-controller: Identifies the node as an interrupt controller
- #interrupt-cells: The number of cells to define the interrupts. Must be 1
as the FPGA IRQ controller has no configuration options for interrupt
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
index 2d6c8bb4d827..6428a6ba9f4a 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
@@ -71,8 +71,8 @@ Bank 1:
24: DMA8
25: DMA9
26: DMA10
-27: DMA11
-28: DMA12
+27: DMA11-14 - shared interrupt for DMA 11 to 14
+28: DMAALL - triggers on all dma interrupts (including chanel 15)
29: AUX
30: ARM
31: VPUDMA
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt
new file mode 100644
index 000000000000..4040905388d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm6345-l1-intc.txt
@@ -0,0 +1,57 @@
+Broadcom BCM6345-style Level 1 interrupt controller
+
+This block is a first level interrupt controller that is typically connected
+directly to one of the HW INT lines on each CPU.
+
+Key elements of the hardware design include:
+
+- 32, 64 or 128 incoming level IRQ lines
+
+- Most onchip peripherals are wired directly to an L1 input
+
+- A separate instance of the register set for each CPU, allowing individual
+ peripheral IRQs to be routed to any CPU
+
+- Contains one or more enable/status word pairs per CPU
+
+- No atomic set/clear operations
+
+- No polarity/level/edge settings
+
+- No FIFO or priority encoder logic; software is expected to read all
+ 2-4 status words to determine which IRQs are pending
+
+Required properties:
+
+- compatible: should be "brcm,bcm<soc>-l1-intc", "brcm,bcm6345-l1-intc"
+- reg: specifies the base physical address and size of the registers;
+ the number of supported IRQs is inferred from the size argument
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+ source, should be 1.
+- interrupt-parent: specifies the phandle to the parent interrupt controller(s)
+ this one is cascaded from
+- interrupts: specifies the interrupt line(s) in the interrupt-parent controller
+ node; valid values depend on the type of parent interrupt controller
+
+If multiple reg ranges and interrupt-parent entries are present on an SMP
+system, the driver will allow IRQ SMP affinity to be set up through the
+/proc/irq/ interface. In the simplest possible configuration, only one
+reg range and one interrupt-parent is needed.
+
+The driver operates in native CPU endian by default, there is no support for
+specifying an alternative endianness.
+
+Example:
+
+periph_intc: interrupt-controller@10000000 {
+ compatible = "brcm,bcm63168-l1-intc", "brcm,bcm6345-l1-intc";
+ reg = <0x10000020 0x20>,
+ <0x10000040 0x20>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <2>, <3>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt
new file mode 100644
index 000000000000..888b2b9f7064
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/ezchip,nps400-ic.txt
@@ -0,0 +1,17 @@
+EZchip NPS Interrupt Controller
+
+Required properties:
+
+- compatible : should be "ezchip,nps400-ic"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 1.
+
+
+Example:
+
+intc: interrupt-controller {
+ compatible = "ezchip,nps400-ic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt
new file mode 100644
index 000000000000..9e389493203f
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,ls-scfg-msi.txt
@@ -0,0 +1,30 @@
+* Freescale Layerscape SCFG PCIe MSI controller
+
+Required properties:
+
+- compatible: should be "fsl,<soc-name>-msi" to identify
+ Layerscape PCIe MSI controller block such as:
+ "fsl,1s1021a-msi"
+ "fsl,1s1043a-msi"
+- msi-controller: indicates that this is a PCIe MSI controller node
+- reg: physical base address of the controller and length of memory mapped.
+- interrupts: an interrupt to the parent interrupt controller.
+
+Optional properties:
+- interrupt-parent: the phandle to the parent interrupt controller.
+
+This interrupt controller hardware is a second level interrupt controller that
+is hooked to a parent interrupt controller: e.g: ARM GIC for ARM-based
+platforms. If interrupt-parent is not provided, the default parent interrupt
+controller will be used.
+Each PCIe node needs to have property msi-parent that points to
+MSI controller node
+
+Examples:
+
+ msi1: msi-controller@1571000 {
+ compatible = "fsl,1s1043a-msi";
+ reg = <0x0 0x1571000 0x0 0x8>,
+ msi-controller;
+ interrupts = <0 116 0x4>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.txt b/Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.txt
index 539adca19e8f..38211f344dc8 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/nxp,lpc3220-mic.txt
@@ -1,38 +1,60 @@
-* NXP LPC32xx Main Interrupt Controller
- (MIC, including SIC1 and SIC2 secondary controllers)
+* NXP LPC32xx MIC, SIC1 and SIC2 Interrupt Controllers
Required properties:
-- compatible: Should be "nxp,lpc3220-mic"
-- interrupt-controller: Identifies the node as an interrupt controller.
-- interrupt-parent: Empty for the interrupt controller itself
-- #interrupt-cells: The number of cells to define the interrupts. Should be 2.
- The first cell is the IRQ number
- The second cell is used to specify mode:
- 1 = low-to-high edge triggered
- 2 = high-to-low edge triggered
- 4 = active high level-sensitive
- 8 = active low level-sensitive
- Default for internal sources should be set to 4 (active high).
-- reg: Should contain MIC registers location and length
+- compatible: "nxp,lpc3220-mic" or "nxp,lpc3220-sic".
+- reg: should contain IC registers location and length.
+- interrupt-controller: identifies the node as an interrupt controller.
+- #interrupt-cells: the number of cells to define an interrupt, should be 2.
+ The first cell is the IRQ number, the second cell is used to specify
+ one of the supported IRQ types:
+ IRQ_TYPE_EDGE_RISING = low-to-high edge triggered,
+ IRQ_TYPE_EDGE_FALLING = high-to-low edge triggered,
+ IRQ_TYPE_LEVEL_HIGH = active high level-sensitive,
+ IRQ_TYPE_LEVEL_LOW = active low level-sensitive.
+ Reset value is IRQ_TYPE_LEVEL_LOW.
+
+Optional properties:
+- interrupt-parent: empty for MIC interrupt controller, link to parent
+ MIC interrupt controller for SIC1 and SIC2
+- interrupts: empty for MIC interrupt controller, cascaded MIC
+ hardware interrupts for SIC1 and SIC2
Examples:
- /*
- * MIC
- */
+
+ /* LPC32xx MIC, SIC1 and SIC2 interrupt controllers */
mic: interrupt-controller@40008000 {
compatible = "nxp,lpc3220-mic";
+ reg = <0x40008000 0x4000>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ sic1: interrupt-controller@4000c000 {
+ compatible = "nxp,lpc3220-sic";
+ reg = <0x4000c000 0x4000>;
interrupt-controller;
- interrupt-parent;
#interrupt-cells = <2>;
- reg = <0x40008000 0xC000>;
+
+ interrupt-parent = <&mic>;
+ interrupts = <0 IRQ_TYPE_LEVEL_LOW>,
+ <30 IRQ_TYPE_LEVEL_LOW>;
};
- /*
- * ADC
- */
+ sic2: interrupt-controller@40010000 {
+ compatible = "nxp,lpc3220-sic";
+ reg = <0x40010000 0x4000>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ interrupt-parent = <&mic>;
+ interrupts = <1 IRQ_TYPE_LEVEL_LOW>,
+ <31 IRQ_TYPE_LEVEL_LOW>;
+ };
+
+ /* ADC */
adc@40048000 {
compatible = "nxp,lpc3220-adc";
reg = <0x40048000 0x1000>;
- interrupt-parent = <&mic>;
- interrupts = <39 4>;
+ interrupt-parent = <&sic1>;
+ interrupts = <7 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 718074501fcb..19fe6f2c83f6 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -16,6 +16,7 @@ conditions.
"arm,mmu-400"
"arm,mmu-401"
"arm,mmu-500"
+ "cavium,smmu-v2"
depending on the particular implementation and/or the
version of the architecture implemented.
diff --git a/Documentation/devicetree/bindings/leds/common.txt b/Documentation/devicetree/bindings/leds/common.txt
index 68419843e32f..af10678ea2f6 100644
--- a/Documentation/devicetree/bindings/leds/common.txt
+++ b/Documentation/devicetree/bindings/leds/common.txt
@@ -37,6 +37,9 @@ Optional properties for child nodes:
property is mandatory for the LEDs in the non-flash modes
(e.g. torch or indicator).
+- panic-indicator : This property specifies that the LED should be used,
+ if at all possible, as a panic indicator.
+
Required properties for flash LED child nodes:
- flash-max-microamp : Maximum flash LED supply current in microamperes.
- flash-max-timeout-us : Maximum timeout in microseconds after which the flash
diff --git a/Documentation/devicetree/bindings/leds/leds-gpio.txt b/Documentation/devicetree/bindings/leds/leds-gpio.txt
index fea1ebfe24a9..cbbeb1850910 100644
--- a/Documentation/devicetree/bindings/leds/leds-gpio.txt
+++ b/Documentation/devicetree/bindings/leds/leds-gpio.txt
@@ -23,6 +23,8 @@ LED sub-node properties:
property is not present.
- retain-state-suspended: (optional) The suspend state can be retained.Such
as charge-led gpio.
+- panic-indicator : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
Examples:
diff --git a/Documentation/devicetree/bindings/media/i2c/adv7180.txt b/Documentation/devicetree/bindings/media/i2c/adv7180.txt
new file mode 100644
index 000000000000..0d501154dfb2
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/adv7180.txt
@@ -0,0 +1,29 @@
+* Analog Devices ADV7180 analog video decoder family
+
+The adv7180 family devices are used to capture analog video to different
+digital interfaces like MIPI CSI-2 or parallel video.
+
+Required Properties :
+- compatible : value must be one of
+ "adi,adv7180"
+ "adi,adv7182"
+ "adi,adv7280"
+ "adi,adv7280-m"
+ "adi,adv7281"
+ "adi,adv7281-m"
+ "adi,adv7281-ma"
+ "adi,adv7282"
+ "adi,adv7282-m"
+
+Example:
+
+ i2c0@1c22000 {
+ ...
+ ...
+ adv7180@21 {
+ compatible = "adi,adv7180";
+ reg = <0x21>;
+ };
+ ...
+ };
+
diff --git a/Documentation/devicetree/bindings/media/rcar_vin.txt b/Documentation/devicetree/bindings/media/rcar_vin.txt
index 619193ccf7ff..6a4e61cbe011 100644
--- a/Documentation/devicetree/bindings/media/rcar_vin.txt
+++ b/Documentation/devicetree/bindings/media/rcar_vin.txt
@@ -5,14 +5,22 @@ The rcar_vin device provides video input capabilities for the Renesas R-Car
family of devices. The current blocks are always slaves and suppot one input
channel which can be either RGB, YUYV or BT656.
- - compatible: Must be one of the following
+ - compatible: Must be one or more of the following
- "renesas,vin-r8a7795" for the R8A7795 device
- "renesas,vin-r8a7794" for the R8A7794 device
- "renesas,vin-r8a7793" for the R8A7793 device
+ - "renesas,vin-r8a7792" for the R8A7792 device
- "renesas,vin-r8a7791" for the R8A7791 device
- "renesas,vin-r8a7790" for the R8A7790 device
- "renesas,vin-r8a7779" for the R8A7779 device
- "renesas,vin-r8a7778" for the R8A7778 device
+ - "renesas,rcar-gen2-vin" for a generic R-Car Gen2 compatible device.
+ - "renesas,rcar-gen3-vin" for a generic R-Car Gen3 compatible device.
+
+ When compatible with the generic version nodes must list the
+ SoC-specific version corresponding to the platform first
+ followed by the generic version.
+
- reg: the register base and size for the device registers
- interrupts: the interrupt for the device
- clocks: Reference to the parent clock
@@ -37,7 +45,7 @@ Device node example
};
vin0: vin@0xe6ef0000 {
- compatible = "renesas,vin-r8a7790";
+ compatible = "renesas,vin-r8a7790", "renesas,rcar-gen2-vin";
clocks = <&mstp8_clks R8A7790_CLK_VIN0>;
reg = <0 0xe6ef0000 0 0x1000>;
interrupts = <0 188 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/media/xilinx/video.txt b/Documentation/devicetree/bindings/media/xilinx/video.txt
index cbd46fa0988f..68ac210e688e 100644
--- a/Documentation/devicetree/bindings/media/xilinx/video.txt
+++ b/Documentation/devicetree/bindings/media/xilinx/video.txt
@@ -20,7 +20,7 @@ The following properties are common to all Xilinx video IP cores.
- xlnx,video-format: This property represents a video format transmitted on an
AXI bus between video IP cores, using its VF code as defined in "AXI4-Stream
Video IP and System Design Guide" [UG934]. How the format relates to the IP
- core is decribed in the IP core bindings documentation.
+ core is described in the IP core bindings documentation.
- xlnx,video-width: This property qualifies the video format with the sample
width expressed as a number of bits per pixel component. All components must
diff --git a/Documentation/devicetree/bindings/memory-controllers/exynos-srom.txt b/Documentation/devicetree/bindings/memory-controllers/exynos-srom.txt
new file mode 100644
index 000000000000..f633b5d0f8ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/exynos-srom.txt
@@ -0,0 +1,79 @@
+SAMSUNG Exynos SoCs SROM Controller driver.
+
+Required properties:
+- compatible : Should contain "samsung,exynos4210-srom".
+
+- reg: offset and length of the register set
+
+Optional properties:
+The SROM controller can be used to attach external peripherals. In this case
+extra properties, describing the bus behind it, should be specified as below:
+
+- #address-cells: Must be set to 2 to allow device address translation.
+ Address is specified as (bank#, offset).
+
+- #size-cells: Must be set to 1 to allow device size passing
+
+- ranges: Must be set up to reflect the memory layout with four integer values
+ per bank:
+ <bank-number> 0 <parent address of bank> <size>
+
+Sub-nodes:
+The actual device nodes should be added as subnodes to the SROMc node. These
+subnodes, in addition to regular device specification, should contain the following
+properties, describing configuration of the relevant SROM bank:
+
+Required properties:
+- reg: bank number, base address (relative to start of the bank) and size of
+ the memory mapped for the device. Note that base address will be
+ typically 0 as this is the start of the bank.
+
+- samsung,srom-timing : array of 6 integers, specifying bank timings in the
+ following order: Tacp, Tcah, Tcoh, Tacc, Tcos, Tacs.
+ Each value is specified in cycles and has the following
+ meaning and valid range:
+ Tacp : Page mode access cycle at Page mode (0 - 15)
+ Tcah : Address holding time after CSn (0 - 15)
+ Tcoh : Chip selection hold on OEn (0 - 15)
+ Tacc : Access cycle (0 - 31, the actual time is N + 1)
+ Tcos : Chip selection set-up before OEn (0 - 15)
+ Tacs : Address set-up before CSn (0 - 15)
+
+Optional properties:
+- reg-io-width : data width in bytes (1 or 2). If omitted, default of 1 is used.
+
+- samsung,srom-page-mode : if page mode is set, 4 data page mode will be configured,
+ else normal (1 data) page mode will be set.
+
+Example: basic definition, no banks are configured
+ memory-controller@12570000 {
+ compatible = "samsung,exynos4210-srom";
+ reg = <0x12570000 0x14>;
+ };
+
+Example: SROMc with SMSC911x ethernet chip on bank 3
+ memory-controller@12570000 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x04000000 0x20000 // Bank0
+ 1 0 0x05000000 0x20000 // Bank1
+ 2 0 0x06000000 0x20000 // Bank2
+ 3 0 0x07000000 0x20000>; // Bank3
+
+ compatible = "samsung,exynos4210-srom";
+ reg = <0x12570000 0x14>;
+
+ ethernet@3,0 {
+ compatible = "smsc,lan9115";
+ reg = <3 0 0x10000>; // Bank 3, offset = 0
+ phy-mode = "mii";
+ interrupt-parent = <&gpx0>;
+ interrupts = <5 8>;
+ reg-io-width = <2>;
+ smsc,irq-push-pull;
+ smsc,force-internal-phy;
+
+ samsung,srom-page-mode;
+ samsung,srom-timing = <9 12 1 9 1 1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mips/brcm/soc.txt b/Documentation/devicetree/bindings/mips/brcm/soc.txt
index 7bab90cc4a7b..4a7e030e4f9b 100644
--- a/Documentation/devicetree/bindings/mips/brcm/soc.txt
+++ b/Documentation/devicetree/bindings/mips/brcm/soc.txt
@@ -4,7 +4,8 @@ Required properties:
- compatible: "brcm,bcm3384", "brcm,bcm33843"
"brcm,bcm3384-viper", "brcm,bcm33843-viper"
- "brcm,bcm6328", "brcm,bcm6368",
+ "brcm,bcm6328", "brcm,bcm6358", "brcm,bcm6368",
+ "brcm,bcm63168", "brcm,bcm63268",
"brcm,bcm7125", "brcm,bcm7346", "brcm,bcm7358", "brcm,bcm7360",
"brcm,bcm7362", "brcm,bcm7420", "brcm,bcm7425"
diff --git a/Documentation/devicetree/bindings/mips/cavium/ciu3.txt b/Documentation/devicetree/bindings/mips/cavium/ciu3.txt
new file mode 100644
index 000000000000..616862ad2b71
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/ciu3.txt
@@ -0,0 +1,27 @@
+* Central Interrupt Unit v3
+
+Properties:
+- compatible: "cavium,octeon-7890-ciu3"
+
+ Compatibility with 78XX and 73XX SOCs.
+
+- interrupt-controller: This is an interrupt controller.
+
+- reg: The base address of the CIU's register bank.
+
+- #interrupt-cells: Must be <2>. The first cell is source number.
+ The second cell indicates the triggering semantics, and may have a
+ value of either 4 for level semantics, or 1 for edge semantics.
+
+Example:
+ interrupt-controller@1010000000000 {
+ compatible = "cavium,octeon-7890-ciu3";
+ interrupt-controller;
+ /* Interrupts are specified by two parts:
+ * 1) Source number (20 significant bits)
+ * 2) Trigger type: (4 == level, 1 == edge)
+ */
+ #address-cells = <0>;
+ #interrupt-cells = <2>;
+ reg = <0x10100 0x00000000 0x0 0xb0000000>;
+ };
diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
index c7a26ca8da12..6611a7c2053a 100644
--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
@@ -30,11 +30,90 @@ Required properties:
region may not be present in some scenarios, such
as in the device tree presented to a virtual machine.
+ - msi-parent
+ Value type: <phandle>
+ Definition: Must be present and point to the MSI controller node
+ handling message interrupts for the MC.
+
+ - ranges
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Defines the mapping between the child
+ MC address space and the parent system address space.
+
+ The MC address space is defined by 3 components:
+ <region type> <offset hi> <offset lo>
+
+ Valid values for region type are
+ 0x0 - MC portals
+ 0x1 - QBMAN portals
+
+ - #address-cells
+ Value type: <u32>
+ Definition: Must be 3. (see definition in 'ranges' property)
+
+ - #size-cells
+ Value type: <u32>
+ Definition: Must be 1.
+
+Sub-nodes:
+
+ The fsl-mc node may optionally have dpmac sub-nodes that describe
+ the relationship between the Ethernet MACs which belong to the MC
+ and the Ethernet PHYs on the system board.
+
+ The dpmac nodes must be under a node named "dpmacs" which contains
+ the following properties:
+
+ - #address-cells
+ Value type: <u32>
+ Definition: Must be present if dpmac sub-nodes are defined and must
+ have a value of 1.
+
+ - #size-cells
+ Value type: <u32>
+ Definition: Must be present if dpmac sub-nodes are defined and must
+ have a value of 0.
+
+ These nodes must have the following properties:
+
+ - compatible
+ Value type: <string>
+ Definition: Must be "fsl,qoriq-mc-dpmac".
+
+ - reg
+ Value type: <prop-encoded-array>
+ Definition: Specifies the id of the dpmac.
+
+ - phy-handle
+ Value type: <phandle>
+ Definition: Specifies the phandle to the PHY device node associated
+ with the this dpmac.
+
Example:
fsl_mc: fsl-mc@80c000000 {
compatible = "fsl,qoriq-mc";
reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */
<0x00000000 0x08340000 0 0x40000>; /* MC control reg */
- };
+ msi-parent = <&its>;
+ #address-cells = <3>;
+ #size-cells = <1>;
+
+ /*
+ * Region type 0x0 - MC portals
+ * Region type 0x1 - QBMAN portals
+ */
+ ranges = <0x0 0x0 0x0 0x8 0x0c000000 0x4000000
+ 0x1 0x0 0x0 0x8 0x18000000 0x8000000>;
+ dpmacs {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ dpmac@1 {
+ compatible = "fsl,qoriq-mc-dpmac";
+ reg = <1>;
+ phy-handle = <&mdio0_phy0>;
+ }
+ }
+ };
diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
index ea5614b6f613..07184e8f894e 100644
--- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
@@ -15,6 +15,7 @@ Required Properties:
- "rockchip,rk3288-dw-mshc": for Rockchip RK3288
- "rockchip,rk3036-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3036
- "rockchip,rk3368-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3368
+ - "rockchip,rk3399-dw-mshc", "rockchip,rk3288-dw-mshc": for Rockchip RK3399
Optional Properties:
* clocks: from common clock binding: if ciu_drive and ciu_sample are
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
index 18d950df2749..88faa91125bf 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-st.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
@@ -38,7 +38,7 @@ Optional properties:
- bus-width: Number of data lines.
See: Documentation/devicetree/bindings/mmc/mmc.txt.
-- max-frequency: Can be 200MHz, 100Mz or 50MHz (default) and used for
+- max-frequency: Can be 200MHz, 100Mz or 50MHz (default) and used for
configuring the CCONFIG3 in the mmcss.
See: Documentation/devicetree/bindings/mmc/mmc.txt.
@@ -48,7 +48,7 @@ Optional properties:
- vqmmc-supply: Phandle to the regulator dt node, mentioned as the vcc/vdd
supply in eMMC/SD specs.
-- sd-uhs--sdr50: To enable the SDR50 in the mmcss.
+- sd-uhs-sdr50: To enable the SDR50 in the mmcss.
See: Documentation/devicetree/bindings/mmc/mmc.txt.
- sd-uhs-sdr104: To enable the SDR104 in the mmcss.
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
index 7fb746dd1a68..0f610d4b5b00 100644
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
@@ -26,3 +26,6 @@ Required properties:
Optional properties:
- toshiba,mmc-wrprotect-disable: write-protect detection is unavailable
+- pinctrl-names: should be "default", "state_uhs"
+- pinctrl-0: should contain default/high speed pin ctrl
+- pinctrl-1: should contain uhs mode pin ctrl
diff --git a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
index 8babdaa8623b..6d1b7971d078 100644
--- a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
+++ b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
@@ -12,6 +12,12 @@ Optional properties:
- vmmc-supply: a phandle of a regulator, supplying Vcc to the card
- vqmmc-supply: a phandle of a regulator, supplying VccQ to the card
+- pinctrl-names: Can contain a "default" entry and a "state_uhs"
+ entry. The state_uhs entry is used together with the default
+ entry when the board requires distinct settings for UHS speeds.
+
+- pinctrl-N: One property for each name listed in pinctrl-names, see
+ ../pinctrl/pinctrl-bindings.txt.
Additionally any standard mmc bindings from mmc.txt can be used.
diff --git a/Documentation/devicetree/bindings/mtd/arm-versatile.txt b/Documentation/devicetree/bindings/mtd/arm-versatile.txt
index beace4b89daa..4ec28796a3c0 100644
--- a/Documentation/devicetree/bindings/mtd/arm-versatile.txt
+++ b/Documentation/devicetree/bindings/mtd/arm-versatile.txt
@@ -1,8 +1,26 @@
Flash device on ARM Versatile board
+These flash chips are found in the ARM reference designs like Integrator,
+Versatile, RealView, Versatile Express etc.
+
+They are regular CFI compatible (Intel or AMD extended) flash chips with
+some special write protect/VPP bits that can be controlled by the machine's
+system controller.
+
Required properties:
-- compatible : must be "arm,versatile-flash";
+- compatible : must be "arm,versatile-flash", "cfi-flash";
+- reg : memory address for the flash chip
- bank-width : width in bytes of flash interface.
+For the rest of the properties, see mtd-physmap.txt.
+
The device tree may optionally contain sub-nodes describing partitions of the
address space. See partition.txt for more detail.
+
+Example:
+
+flash@34000000 {
+ compatible = "arm,versatile-flash", "cfi-flash";
+ reg = <0x34000000 0x4000000>;
+ bank-width = <4>;
+};
diff --git a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
index 0333ec87dc49..c34aa6f8a424 100644
--- a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
+++ b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
@@ -5,7 +5,8 @@ Required properties:
"fsl,imx7d-qspi", "fsl,imx6ul-qspi",
"fsl,ls1021a-qspi"
or
- "fsl,ls2080a-qspi" followed by "fsl,ls1021a-qspi"
+ "fsl,ls2080a-qspi" followed by "fsl,ls1021a-qspi",
+ "fsl,ls1043a-qspi" followed by "fsl,ls1021a-qspi"
- reg : the first contains the register location and length,
the second contains the memory mapping address and length
- reg-names: Should contain the reg names "QuadSPI" and "QuadSPI-memory"
diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
index 078060a97f95..05f705e32a4a 100644
--- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
+++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
@@ -18,6 +18,8 @@ Required properties for all the ethernet interfaces:
- First is the Rx interrupt. This irq is mandatory.
- Second is the Tx completion interrupt.
This is supported only on SGMII based 1GbE and 10GbE interfaces.
+- channel: Ethernet to CPU, start channel (prefetch buffer) number
+ - Must map to the first irq and irqs must be sequential
- port-id: Port number (0 or 1)
- clocks: Reference to the clock entry.
- local-mac-address: MAC address assigned to this device
diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 28a4781ab6d7..0ae06491b430 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -45,13 +45,13 @@ Required properties:
Optional properties:
- dual_emac_res_vlan : Specifies VID to be used to segregate the ports
- mac-address : See ethernet.txt file in the same directory
-- phy_id : Specifies slave phy id
+- phy_id : Specifies slave phy id (deprecated, use phy-handle)
- phy-handle : See ethernet.txt file in the same directory
Slave sub-nodes:
- fixed-link : See fixed-link.txt file in the same directory
- Either the property phy_id, or the sub-node
- fixed-link can be specified
+
+Note: Exactly one of phy_id, phy-handle, or fixed-link must be specified.
Note: "ti,hwmods" field is used to fetch the base address and irq
resources from TI, omap hwmod data base during device registration.
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt
index 5fdbbcdf8c4b..9f4807f90c31 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.txt
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt
@@ -31,8 +31,6 @@ A switch child node has the following optional property:
switch. Must be set if the switch can not detect
the presence and/or size of a connected EEPROM,
otherwise optional.
-- reset-gpios : phandle and specifier to a gpio line connected to
- reset pin of the switch chip.
A switch may have multiple "port" children nodes
diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt
new file mode 100644
index 000000000000..7629189398aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/marvell.txt
@@ -0,0 +1,35 @@
+Marvell DSA Switch Device Tree Bindings
+---------------------------------------
+
+WARNING: This binding is currently unstable. Do not program it into a
+FLASH never to be changed again. Once this binding is stable, this
+warning will be removed.
+
+If you need a stable binding, use the old dsa.txt binding.
+
+Marvell Switches are MDIO devices. The following properties should be
+placed as a child node of an mdio device.
+
+The properties described here are those specific to Marvell devices.
+Additional required and optional properties can be found in dsa.txt.
+
+Required properties:
+- compatible : Should be one of "marvell,mv88e6085",
+- reg : Address on the MII bus for the switch.
+
+Optional properties:
+
+- reset-gpios : Should be a gpio specifier for a reset line
+
+Example:
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch0: switch@0 {
+ compatible = "marvell,mv88e6085";
+ reg = <0>;
+ reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
index ecacfa44b1eb..d4b7f2e49984 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
@@ -7,19 +7,45 @@ Required properties:
- mode: dsa fabric mode string. only support one of dsaf modes like these:
"2port-64vf",
"6port-16rss",
- "6port-16vf".
+ "6port-16vf",
+ "single-port".
- interrupt-parent: the interrupt parent of this device.
- interrupts: should contain the DSA Fabric and rcb interrupt.
- reg: specifies base physical address(es) and size of the device registers.
- The first region is external interface control register base and size.
- The second region is SerDes base register and size.
+ The first region is external interface control register base and size(optional,
+ only used when subctrl-syscon does not exist). It is recommended using
+ subctrl-syscon rather than this address.
+ The second region is SerDes base register and size(optional, only used when
+ serdes-syscon in port node does not exist). It is recommended using
+ serdes-syscon rather than this address.
The third region is the PPE register base and size.
- The fourth region is dsa fabric base register and size.
- The fifth region is cpld base register and size, it is not required if do not use cpld.
-- phy-handle: phy handle of physicl port, 0 if not any phy device. see ethernet.txt [1].
+ The fourth region is dsa fabric base register and size. It is not required for
+ single-port mode.
+- reg-names: may be ppe-base and(or) dsaf-base. It is used to find the
+ corresponding reg's index.
+
+- phy-handle: phy handle of physical port, 0 if not any phy device. It is optional
+ attribute. If port node exists, phy-handle in each port node will be used.
+ see ethernet.txt [1].
+- subctrl-syscon: is syscon handle for external interface control register.
+- reset-field-offset: is offset of reset field. Its value depends on the hardware
+ user manual.
- buf-size: rx buffer size, should be 16-1024.
- desc-num: number of description in TX and RX queue, should be 512, 1024, 2048 or 4096.
+- port: subnodes of dsaf. A dsaf node may contain several port nodes(Depending
+ on mode of dsaf). Port node contain some attributes listed below:
+- reg: is physical port index in one dsaf.
+- phy-handle: phy handle of physical port. It is not required if there isn't
+ phy device. see ethernet.txt [1].
+- serdes-syscon: is syscon handle for SerDes register.
+- cpld-syscon: is syscon handle + register offset pair for cpld register. It is
+ not required if there isn't cpld device.
+- port-rst-offset: is offset of reset field for each port in dsaf. Its value
+ depends on the hardware user manual.
+- port-mode-offset: is offset of port mode field for each port in dsaf. Its
+ value depends on the hardware user manual.
+
[1] Documentation/devicetree/bindings/net/phy.txt
Example:
@@ -28,11 +54,11 @@ dsaf0: dsa@c7000000 {
compatible = "hisilicon,hns-dsaf-v1";
mode = "6port-16rss";
interrupt-parent = <&mbigen_dsa>;
- reg = <0x0 0xC0000000 0x0 0x420000
- 0x0 0xC2000000 0x0 0x300000
- 0x0 0xc5000000 0x0 0x890000
+ reg = <0x0 0xc5000000 0x0 0x890000
0x0 0xc7000000 0x0 0x60000>;
- phy-handle = <0 0 0 0 &soc0_phy4 &soc0_phy5 0 0>;
+ reg-names = "ppe-base", "dsaf-base";
+ subctrl-syscon = <&subctrl>;
+ reset-field-offset = 0;
interrupts = <131 4>,<132 4>, <133 4>,<134 4>,
<135 4>,<136 4>, <137 4>,<138 4>,
<139 4>,<140 4>, <141 4>,<142 4>,
@@ -43,4 +69,15 @@ dsaf0: dsa@c7000000 {
buf-size = <4096>;
desc-num = <1024>;
dma-coherent;
+
+ port@0 {
+ reg = 0;
+ phy-handle = <&phy0>;
+ serdes-syscon = <&serdes>;
+ };
+
+ port@1 {
+ reg = 1;
+ serdes-syscon = <&serdes>;
+ };
};
diff --git a/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt b/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
index e6a9d1c30878..b9ff4ba6454e 100644
--- a/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
+++ b/Documentation/devicetree/bindings/net/hisilicon-hns-nic.txt
@@ -36,6 +36,34 @@ Required properties:
| | | | | |
external port
+ This attribute is remained for compatible purpose. It is not recommended to
+ use it in new code.
+
+- port-idx-in-ae: is the index of port provided by AE.
+ In NIC mode of DSAF, all 6 PHYs of service DSAF are taken as ethernet ports
+ to the CPU. The port-idx-in-ae can be 0 to 5. Here is the diagram:
+ +-----+---------------+
+ | CPU |
+ +-+-+-+---+-+-+-+-+-+-+
+ | | | | | | | |
+ debug debug service
+ port port port
+ (0) (0) (0-5)
+
+ In Switch mode of DSAF, all 6 PHYs of service DSAF are taken as physical
+ ports connected to a LAN Switch while the CPU side assume itself have one
+ single NIC connected to this switch. In this case, the port-idx-in-ae
+ will be 0 only.
+ +-----+-----+------+------+
+ | CPU |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | service| port(0)
+ debug debug +------------+
+ port port | switch |
+ (0) (0) +-+-+-+-+-+-++
+ | | | | | |
+ external port
+
- local-mac-address: mac addr of the ethernet interface
Example:
@@ -43,6 +71,6 @@ Example:
ethernet@0{
compatible = "hisilicon,hns-nic-v1";
ae-handle = <&dsaf0>;
- port-id = <0>;
+ port-idx-in-ae = <0>;
local-mac-address = [a2 14 e4 4b 56 76];
};
diff --git a/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt b/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt
new file mode 100644
index 000000000000..14aa6cf58201
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell-bt-sd8xxx.txt
@@ -0,0 +1,56 @@
+Marvell 8897/8997 (sd8897/sd8997) bluetooth SDIO devices
+------
+
+Required properties:
+
+ - compatible : should be one of the following:
+ * "marvell,sd8897-bt"
+ * "marvell,sd8997-bt"
+
+Optional properties:
+
+ - marvell,cal-data: Calibration data downloaded to the device during
+ initialization. This is an array of 28 values(u8).
+
+ - marvell,wakeup-pin: It represents wakeup pin number of the bluetooth chip.
+ firmware will use the pin to wakeup host system.
+ - marvell,wakeup-gap-ms: wakeup gap represents wakeup latency of the host
+ platform. The value will be configured to firmware. This
+ is needed to work chip's sleep feature as expected.
+ - interrupt-parent: phandle of the parent interrupt controller
+ - interrupts : interrupt pin number to the cpu. Driver will request an irq based
+ on this interrupt number. During system suspend, the irq will be
+ enabled so that the bluetooth chip can wakeup host platform under
+ certain condition. During system resume, the irq will be disabled
+ to make sure unnecessary interrupt is not received.
+
+Example:
+
+IRQ pin 119 is used as system wakeup source interrupt.
+wakeup pin 13 and gap 100ms are configured so that firmware can wakeup host
+using this device side pin and wakeup latency.
+calibration data is also available in below example.
+
+&mmc3 {
+ status = "okay";
+ vmmc-supply = <&wlan_en_reg>;
+ bus-width = <4>;
+ cap-power-off-card;
+ keep-power-in-suspend;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ btmrvl: bluetooth@2 {
+ compatible = "marvell,sd8897-bt";
+ reg = <2>;
+ interrupt-parent = <&pio>;
+ interrupts = <119 IRQ_TYPE_LEVEL_LOW>;
+
+ marvell,cal-data = /bits/ 8 <
+ 0x37 0x01 0x1c 0x00 0xff 0xff 0xff 0xff 0x01 0x7f 0x04 0x02
+ 0x00 0x00 0xba 0xce 0xc0 0xc6 0x2d 0x00 0x00 0x00 0x00 0x00
+ 0x00 0x00 0xf0 0x00>;
+ marvell,wakeup-pin = <0x0d>;
+ marvell,wakeup-gap-ms = <0x64>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index 5ca79290eabf..32eaaca04d9b 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -9,7 +9,8 @@ have dual GMAC each represented by a child node..
Required properties:
- compatible: Should be "mediatek,mt7623-eth"
- reg: Address and length of the register set for the device
-- interrupts: Should contain the frame engines interrupt
+- interrupts: Should contain the three frame engines interrupts in numeric
+ order. These are fe_int0, fe_int1 and fe_int2.
- clocks: the clock used by the core
- clock-names: the names of the clock listed in the clocks property. These are
"ethif", "esw", "gp2", "gp1"
@@ -42,7 +43,9 @@ eth: ethernet@1b100000 {
<&ethsys CLK_ETHSYS_GP2>,
<&ethsys CLK_ETHSYS_GP1>;
clock-names = "ethif", "esw", "gp2", "gp1";
- interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_LOW>;
+ interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_LOW
+ GIC_SPI 199 IRQ_TYPE_LEVEL_LOW
+ GIC_SPI 198 IRQ_TYPE_LEVEL_LOW>;
power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
resets = <&ethsys MT2701_ETHSYS_ETH_RST>;
reset-names = "eth";
diff --git a/Documentation/devicetree/bindings/net/microchip,enc28j60.txt b/Documentation/devicetree/bindings/net/microchip,enc28j60.txt
new file mode 100644
index 000000000000..1dc3bc75539d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/microchip,enc28j60.txt
@@ -0,0 +1,59 @@
+* Microchip ENC28J60
+
+This is a standalone 10 MBit ethernet controller with SPI interface.
+
+For each device connected to a SPI bus, define a child node within
+the SPI master node.
+
+Required properties:
+- compatible: Should be "microchip,enc28j60"
+- reg: Specify the SPI chip select the ENC28J60 is wired to
+- interrupt-parent: Specify the phandle of the source interrupt, see interrupt
+ binding documentation for details. Usually this is the GPIO bank
+ the interrupt line is wired to.
+- interrupts: Specify the interrupt index within the interrupt controller (referred
+ to above in interrupt-parent) and interrupt type. The ENC28J60 natively
+ generates falling edge interrupts, however, additional board logic
+ might invert the signal.
+- pinctrl-names: List of assigned state names, see pinctrl binding documentation.
+- pinctrl-0: List of phandles to configure the GPIO pin used as interrupt line,
+ see also generic and your platform specific pinctrl binding
+ documentation.
+
+Optional properties:
+- spi-max-frequency: Maximum frequency of the SPI bus when accessing the ENC28J60.
+ According to the ENC28J80 datasheet, the chip allows a maximum of 20 MHz, however,
+ board designs may need to limit this value.
+- local-mac-address: See ethernet.txt in the same directory.
+
+
+Example (for NXP i.MX28 with pin control stuff for GPIO irq):
+
+ ssp2: ssp@80014000 {
+ compatible = "fsl,imx28-spi";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi2_pins_b &spi2_sck_cfg>;
+ status = "okay";
+
+ enc28j60: ethernet@0 {
+ compatible = "microchip,enc28j60";
+ pinctrl-names = "default";
+ pinctrl-0 = <&enc28j60_pins>;
+ reg = <0>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <3 IRQ_TYPE_EDGE_FALLING>;
+ spi-max-frequency = <12000000>;
+ };
+ };
+
+ pinctrl@80018000 {
+ enc28j60_pins: enc28j60_pins@0 {
+ reg = <0>;
+ fsl,pinmux-ids = <
+ MX28_PAD_AUART0_RTS__GPIO_3_3 /* Interrupt */
+ >;
+ fsl,drive-strength = <MXS_DRIVE_4mA>;
+ fsl,voltage = <MXS_VOLTAGE_HIGH>;
+ fsl,pull-up = <MXS_PULL_DISABLE>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt b/Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt
new file mode 100644
index 000000000000..1aea822d4530
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/pn533-i2c.txt
@@ -0,0 +1,31 @@
+* NXP Semiconductors PN532 NFC Controller
+
+Required properties:
+- compatible: Should be "nxp,pn532-i2c" or "nxp,pn533-i2c".
+- clock-frequency: I²C work frequency.
+- reg: address on the bus
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBone with PN532 on I2C2):
+
+&i2c2 {
+
+ status = "okay";
+
+ pn532: pn532@24 {
+
+ compatible = "nxp,pn532-i2c";
+
+ reg = <0x24>;
+ clock-frequency = <400000>;
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <17 IRQ_TYPE_EDGE_FALLING>;
+
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt
index bc1c3c8bf8fa..c00a9a894547 100644
--- a/Documentation/devicetree/bindings/net/phy.txt
+++ b/Documentation/devicetree/bindings/net/phy.txt
@@ -35,6 +35,8 @@ Optional Properties:
- broken-turn-around: If set, indicates the PHY device does not correctly
release the turn around line low at the end of a MDIO transaction.
+- reset-gpios: Reference to a GPIO used to reset the phy.
+
Example:
ethernet-phy@0 {
@@ -42,4 +44,5 @@ ethernet-phy@0 {
interrupt-parent = <40000>;
interrupts = <35 1>;
reg = <0>;
+ reset-gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
};
diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index 6605d19601c2..4d302db657c0 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -59,6 +59,8 @@ Optional properties:
- snps,fb: fixed-burst
- snps,mb: mixed-burst
- snps,rb: rebuild INCRx Burst
+ - snps,tso: this enables the TSO feature otherwise it will be managed by
+ MAC HW capability register.
- mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus.
Examples:
diff --git a/Documentation/devicetree/bindings/net/wireless/marvell-sd8xxx.txt b/Documentation/devicetree/bindings/net/wireless/marvell-sd8xxx.txt
new file mode 100644
index 000000000000..c421aba0a5bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/marvell-sd8xxx.txt
@@ -0,0 +1,63 @@
+Marvell 8897/8997 (sd8897/sd8997) SDIO devices
+------
+
+This node provides properties for controlling the marvell sdio wireless device.
+The node is expected to be specified as a child node to the SDIO controller that
+connects the device to the system.
+
+Required properties:
+
+ - compatible : should be one of the following:
+ * "marvell,sd8897"
+ * "marvell,sd8997"
+
+Optional properties:
+
+ - marvell,caldata* : A series of properties with marvell,caldata prefix,
+ represent calibration data downloaded to the device during
+ initialization. This is an array of unsigned 8-bit values.
+ the properties should follow below property name and
+ corresponding array length:
+ "marvell,caldata-txpwrlimit-2g" (length = 566).
+ "marvell,caldata-txpwrlimit-5g-sub0" (length = 502).
+ "marvell,caldata-txpwrlimit-5g-sub1" (length = 688).
+ "marvell,caldata-txpwrlimit-5g-sub2" (length = 750).
+ "marvell,caldata-txpwrlimit-5g-sub3" (length = 502).
+ - marvell,wakeup-pin : a wakeup pin number of wifi chip which will be configured
+ to firmware. Firmware will wakeup the host using this pin
+ during suspend/resume.
+ - interrupt-parent: phandle of the parent interrupt controller
+ - interrupts : interrupt pin number to the cpu. driver will request an irq based on
+ this interrupt number. during system suspend, the irq will be enabled
+ so that the wifi chip can wakeup host platform under certain condition.
+ during system resume, the irq will be disabled to make sure
+ unnecessary interrupt is not received.
+
+Example:
+
+Tx power limit calibration data is configured in below example.
+The calibration data is an array of unsigned values, the length
+can vary between hw versions.
+IRQ pin 38 is used as system wakeup source interrupt. wakeup pin 3 is configured
+so that firmware can wakeup host using this device side pin.
+
+&mmc3 {
+ status = "okay";
+ vmmc-supply = <&wlan_en_reg>;
+ bus-width = <4>;
+ cap-power-off-card;
+ keep-power-in-suspend;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ mwifiex: wifi@1 {
+ compatible = "marvell,sd8897";
+ reg = <1>;
+ interrupt-parent = <&pio>;
+ interrupts = <38 IRQ_TYPE_LEVEL_LOW>;
+
+ marvell,caldata_00_txpwrlimit_2g_cfg_set = /bits/ 8 <
+ 0x01 0x00 0x06 0x00 0x08 0x02 0x89 0x01>;
+ marvell,wakeup-pin = <3>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
index 96aae6b4f736..74d7f0af209c 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
@@ -5,12 +5,18 @@ Required properties:
* "qcom,ath10k"
* "qcom,ipq4019-wifi"
-PCI based devices uses compatible string "qcom,ath10k" and takes only
-calibration data via "qcom,ath10k-calibration-data". Rest of the properties
-are not applicable for PCI based devices.
+PCI based devices uses compatible string "qcom,ath10k" and takes calibration
+data along with board specific data via "qcom,ath10k-calibration-data".
+Rest of the properties are not applicable for PCI based devices.
AHB based devices (i.e. ipq4019) uses compatible string "qcom,ipq4019-wifi"
-and also uses most of the properties defined in this doc.
+and also uses most of the properties defined in this doc (except
+"qcom,ath10k-calibration-data"). It uses "qcom,ath10k-pre-calibration-data"
+to carry pre calibration data.
+
+In general, entry "qcom,ath10k-pre-calibration-data" and
+"qcom,ath10k-calibration-data" conflict with each other and only one
+can be provided per device.
Optional properties:
- reg: Address and length of the register set for the device.
@@ -35,8 +41,11 @@ Optional properties:
- qcom,msi_addr: MSI interrupt address.
- qcom,msi_base: Base value to add before writing MSI data into
MSI address register.
-- qcom,ath10k-calibration-data : calibration data as an array, the
- length can vary between hw versions
+- qcom,ath10k-calibration-data : calibration data + board specific data
+ as an array, the length can vary between
+ hw versions.
+- qcom,ath10k-pre-calibration-data : pre calibration data as an array,
+ the length can vary between hw versions.
Example (to supply the calibration data alone):
@@ -105,5 +114,5 @@ wifi0: wifi@a000000 {
"legacy";
qcom,msi_addr = <0x0b006040>;
qcom,msi_base = <0x40>;
- qcom,ath10k-calibration-data = [ 01 02 03 ... ];
+ qcom,ath10k-pre-calibration-data = [ 01 02 03 ... ];
};
diff --git a/Documentation/devicetree/bindings/numa.txt b/Documentation/devicetree/bindings/numa.txt
new file mode 100644
index 000000000000..21b35053ca5a
--- /dev/null
+++ b/Documentation/devicetree/bindings/numa.txt
@@ -0,0 +1,275 @@
+==============================================================================
+NUMA binding description.
+==============================================================================
+
+==============================================================================
+1 - Introduction
+==============================================================================
+
+Systems employing a Non Uniform Memory Access (NUMA) architecture contain
+collections of hardware resources including processors, memory, and I/O buses,
+that comprise what is commonly known as a NUMA node.
+Processor accesses to memory within the local NUMA node is generally faster
+than processor accesses to memory outside of the local NUMA node.
+DT defines interfaces that allow the platform to convey NUMA node
+topology information to OS.
+
+==============================================================================
+2 - numa-node-id
+==============================================================================
+
+For the purpose of identification, each NUMA node is associated with a unique
+token known as a node id. For the purpose of this binding
+a node id is a 32-bit integer.
+
+A device node is associated with a NUMA node by the presence of a
+numa-node-id property which contains the node id of the device.
+
+Example:
+ /* numa node 0 */
+ numa-node-id = <0>;
+
+ /* numa node 1 */
+ numa-node-id = <1>;
+
+==============================================================================
+3 - distance-map
+==============================================================================
+
+The optional device tree node distance-map describes the relative
+distance (memory latency) between all numa nodes.
+
+- compatible : Should at least contain "numa-distance-map-v1".
+
+- distance-matrix
+ This property defines a matrix to describe the relative distances
+ between all numa nodes.
+ It is represented as a list of node pairs and their relative distance.
+
+ Note:
+ 1. Each entry represents distance from first node to second node.
+ The distances are equal in either direction.
+ 2. The distance from a node to self (local distance) is represented
+ with value 10 and all internode distance should be represented with
+ a value greater than 10.
+ 3. distance-matrix should have entries in lexicographical ascending
+ order of nodes.
+ 4. There must be only one device node distance-map which must
+ reside in the root node.
+ 5. If the distance-map node is not present, a default
+ distance-matrix is used.
+
+Example:
+ 4 nodes connected in mesh/ring topology as below,
+
+ 0_______20______1
+ | |
+ | |
+ 20 20
+ | |
+ | |
+ |_______________|
+ 3 20 2
+
+ if relative distance for each hop is 20,
+ then internode distance would be,
+ 0 -> 1 = 20
+ 1 -> 2 = 20
+ 2 -> 3 = 20
+ 3 -> 0 = 20
+ 0 -> 2 = 40
+ 1 -> 3 = 40
+
+ and dt presentation for this distance matrix is,
+
+ distance-map {
+ compatible = "numa-distance-map-v1";
+ distance-matrix = <0 0 10>,
+ <0 1 20>,
+ <0 2 40>,
+ <0 3 20>,
+ <1 0 20>,
+ <1 1 10>,
+ <1 2 20>,
+ <1 3 40>,
+ <2 0 40>,
+ <2 1 20>,
+ <2 2 10>,
+ <2 3 20>,
+ <3 0 20>,
+ <3 1 40>,
+ <3 2 20>,
+ <3 3 10>;
+ };
+
+==============================================================================
+4 - Example dts
+==============================================================================
+
+Dual socket system consists of 2 boards connected through ccn bus and
+each board having one socket/soc of 8 cpus, memory and pci bus.
+
+ memory@c00000 {
+ device_type = "memory";
+ reg = <0x0 0xc00000 0x0 0x80000000>;
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+
+ memory@10000000000 {
+ device_type = "memory";
+ reg = <0x100 0x0 0x0 0x80000000>;
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x2>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x3>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@4 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x4>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@5 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x5>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@6 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x6>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@7 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x7>;
+ enable-method = "psci";
+ numa-node-id = <0>;
+ };
+ cpu@8 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x8>;
+ enable-method = "psci";
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+ cpu@9 {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0x9>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@a {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xa>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@b {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xb>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@c {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xc>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@d {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xd>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@e {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xe>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ cpu@f {
+ device_type = "cpu";
+ compatible = "arm,armv8";
+ reg = <0x0 0xf>;
+ enable-method = "psci";
+ numa-node-id = <1>;
+ };
+ };
+
+ pcie0: pcie0@848000000000 {
+ compatible = "arm,armv8";
+ device_type = "pci";
+ bus-range = <0 255>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0x8480 0x00000000 0 0x10000000>; /* Configuration space */
+ ranges = <0x03000000 0x8010 0x00000000 0x8010 0x00000000 0x70 0x00000000>;
+ /* node 0 */
+ numa-node-id = <0>;
+ };
+
+ pcie1: pcie1@948000000000 {
+ compatible = "arm,armv8";
+ device_type = "pci";
+ bus-range = <0 255>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0x9480 0x00000000 0 0x10000000>; /* Configuration space */
+ ranges = <0x03000000 0x9010 0x00000000 0x9010 0x00000000 0x70 0x00000000>;
+ /* node 1 */
+ numa-node-id = <1>;
+ };
+
+ distance-map {
+ compatible = "numa-distance-map-v1";
+ distance-matrix = <0 0 10>,
+ <0 1 20>,
+ <1 1 10>;
+ };
diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
index 3be80c68941a..83aeb1f5a645 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
@@ -4,8 +4,8 @@ This PCIe host controller is based on the Synopsis Designware PCIe IP
and thus inherits all the common properties defined in designware-pcie.txt.
Required properties:
-- compatible: "fsl,imx6q-pcie"
-- reg: base addresse and length of the pcie controller
+- compatible: "fsl,imx6q-pcie", "fsl,imx6sx-pcie", "fsl,imx6qp-pcie"
+- reg: base address and length of the PCIe controller
- interrupts: A list of interrupt outputs of the controller. Must contain an
entry for each entry in the interrupt-names property.
- interrupt-names: Must include the following entries:
@@ -19,6 +19,20 @@ Optional properties:
- fsl,tx-deemph-gen2-6db: Gen2 (6db) De-emphasis value. Default: 20
- fsl,tx-swing-full: Gen2 TX SWING FULL value. Default: 127
- fsl,tx-swing-low: TX launch amplitude swing_low value. Default: 127
+- fsl,max-link-speed: Specify PCI gen for link capability. Must be '2' for
+ gen2, otherwise will default to gen1. Note that the IMX6 LVDS clock outputs
+ do not meet gen2 jitter requirements and thus for gen2 capability a gen2
+ compliant clock generator should be used and configured.
+- reset-gpio: Should specify the GPIO for controlling the PCI bus device reset
+ signal. It's not polarity aware and defaults to active-low reset sequence
+ (L=reset state, H=operation state).
+- reset-gpio-active-high: If present then the reset sequence using the GPIO
+ specified in the "reset-gpio" property is reversed (H=reset state,
+ L=operation state).
+
+Additional required properties for imx6sx-pcie:
+- clock names: Must include the following additional entries:
+ - "pcie_inbound_axi"
Example:
diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
index 75321ae23c08..b8cc395fffea 100644
--- a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
@@ -60,11 +60,14 @@ Required properties:
- afi
- pcie_x
-Required properties on Tegra124 and later:
+Required properties on Tegra124 and later (deprecated):
- phys: Must contain an entry for each entry in phy-names.
- phy-names: Must include the following entries:
- pcie
+These properties are deprecated in favour of per-lane PHYs define in each of
+the root ports (see below).
+
Power supplies for Tegra20:
- avdd-pex-supply: Power supply for analog PCIe logic. Must supply 1.05 V.
- vdd-pex-supply: Power supply for digital PCIe I/O. Must supply 1.05 V.
@@ -122,11 +125,22 @@ Required properties:
- Root port 0 uses 4 lanes, root port 1 is unused.
- Both root ports use 2 lanes.
-Example:
+Required properties for Tegra124 and later:
+- phys: Must contain an phandle to a PHY for each entry in phy-names.
+- phy-names: Must include an entry for each active lane. Note that the number
+ of entries does not have to (though usually will) be equal to the specified
+ number of lanes in the nvidia,num-lanes property. Entries are of the form
+ "pcie-N": where N ranges from 0 to the value specified in nvidia,num-lanes.
+
+Examples:
+=========
+
+Tegra20:
+--------
SoC DTSI:
- pcie-controller {
+ pcie-controller@80003000 {
compatible = "nvidia,tegra20-pcie";
device_type = "pci";
reg = <0x80003000 0x00000800 /* PADS registers */
@@ -186,10 +200,9 @@ SoC DTSI:
};
};
-
Board DTS:
- pcie-controller {
+ pcie-controller@80003000 {
status = "okay";
vdd-supply = <&pci_vdd_reg>;
@@ -222,3 +235,204 @@ if a device on the PCI bus provides a non-probeable bus such as I2C or SPI,
device nodes need to be added in order to allow the bus' children to be
instantiated at the proper location in the operating system's device tree (as
illustrated by the optional nodes in the example above).
+
+Tegra30:
+--------
+
+SoC DTSI:
+
+ pcie-controller@00003000 {
+ compatible = "nvidia,tegra30-pcie";
+ device_type = "pci";
+ reg = <0x00003000 0x00000800 /* PADS registers */
+ 0x00003800 0x00000200 /* AFI registers */
+ 0x10000000 0x10000000>; /* configuration space */
+ reg-names = "pads", "afi", "cs";
+ interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH /* controller interrupt */
+ GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>; /* MSI interrupt */
+ interrupt-names = "intr", "msi";
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &intc GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+
+ bus-range = <0x00 0xff>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ranges = <0x82000000 0 0x00000000 0x00000000 0 0x00001000 /* port 0 configuration space */
+ 0x82000000 0 0x00001000 0x00001000 0 0x00001000 /* port 1 configuration space */
+ 0x82000000 0 0x00004000 0x00004000 0 0x00001000 /* port 2 configuration space */
+ 0x81000000 0 0 0x02000000 0 0x00010000 /* downstream I/O */
+ 0x82000000 0 0x20000000 0x20000000 0 0x08000000 /* non-prefetchable memory */
+ 0xc2000000 0 0x28000000 0x28000000 0 0x18000000>; /* prefetchable memory */
+
+ clocks = <&tegra_car TEGRA30_CLK_PCIE>,
+ <&tegra_car TEGRA30_CLK_AFI>,
+ <&tegra_car TEGRA30_CLK_PLL_E>,
+ <&tegra_car TEGRA30_CLK_CML0>;
+ clock-names = "pex", "afi", "pll_e", "cml";
+ resets = <&tegra_car 70>,
+ <&tegra_car 72>,
+ <&tegra_car 74>;
+ reset-names = "pex", "afi", "pcie_x";
+ status = "disabled";
+
+ pci@1,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82000800 0 0x00000000 0 0x1000>;
+ reg = <0x000800 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges;
+
+ nvidia,num-lanes = <2>;
+ };
+
+ pci@2,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82001000 0 0x00001000 0 0x1000>;
+ reg = <0x001000 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges;
+
+ nvidia,num-lanes = <2>;
+ };
+
+ pci@3,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82001800 0 0x00004000 0 0x1000>;
+ reg = <0x001800 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges;
+
+ nvidia,num-lanes = <2>;
+ };
+ };
+
+Board DTS:
+
+ pcie-controller@00003000 {
+ status = "okay";
+
+ avdd-pexa-supply = <&ldo1_reg>;
+ vdd-pexa-supply = <&ldo1_reg>;
+ avdd-pexb-supply = <&ldo1_reg>;
+ vdd-pexb-supply = <&ldo1_reg>;
+ avdd-pex-pll-supply = <&ldo1_reg>;
+ avdd-plle-supply = <&ldo1_reg>;
+ vddio-pex-ctl-supply = <&sys_3v3_reg>;
+ hvdd-pex-supply = <&sys_3v3_pexs_reg>;
+
+ pci@1,0 {
+ status = "okay";
+ };
+
+ pci@3,0 {
+ status = "okay";
+ };
+ };
+
+Tegra124:
+---------
+
+SoC DTSI:
+
+ pcie-controller@01003000 {
+ compatible = "nvidia,tegra124-pcie";
+ device_type = "pci";
+ reg = <0x0 0x01003000 0x0 0x00000800 /* PADS registers */
+ 0x0 0x01003800 0x0 0x00000800 /* AFI registers */
+ 0x0 0x02000000 0x0 0x10000000>; /* configuration space */
+ reg-names = "pads", "afi", "cs";
+ interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>, /* controller interrupt */
+ <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>; /* MSI interrupt */
+ interrupt-names = "intr", "msi";
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &gic GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+
+ bus-range = <0x00 0xff>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ranges = <0x82000000 0 0x01000000 0x0 0x01000000 0 0x00001000 /* port 0 configuration space */
+ 0x82000000 0 0x01001000 0x0 0x01001000 0 0x00001000 /* port 1 configuration space */
+ 0x81000000 0 0x0 0x0 0x12000000 0 0x00010000 /* downstream I/O (64 KiB) */
+ 0x82000000 0 0x13000000 0x0 0x13000000 0 0x0d000000 /* non-prefetchable memory (208 MiB) */
+ 0xc2000000 0 0x20000000 0x0 0x20000000 0 0x20000000>; /* prefetchable memory (512 MiB) */
+
+ clocks = <&tegra_car TEGRA124_CLK_PCIE>,
+ <&tegra_car TEGRA124_CLK_AFI>,
+ <&tegra_car TEGRA124_CLK_PLL_E>,
+ <&tegra_car TEGRA124_CLK_CML0>;
+ clock-names = "pex", "afi", "pll_e", "cml";
+ resets = <&tegra_car 70>,
+ <&tegra_car 72>,
+ <&tegra_car 74>;
+ reset-names = "pex", "afi", "pcie_x";
+ status = "disabled";
+
+ pci@1,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82000800 0 0x01000000 0 0x1000>;
+ reg = <0x000800 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges;
+
+ nvidia,num-lanes = <2>;
+ };
+
+ pci@2,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82001000 0 0x01001000 0 0x1000>;
+ reg = <0x001000 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges;
+
+ nvidia,num-lanes = <1>;
+ };
+ };
+
+Board DTS:
+
+ pcie-controller@01003000 {
+ status = "okay";
+
+ avddio-pex-supply = <&vdd_1v05_run>;
+ dvddio-pex-supply = <&vdd_1v05_run>;
+ avdd-pex-pll-supply = <&vdd_1v05_run>;
+ hvdd-pex-supply = <&vdd_3v3_lp0>;
+ hvdd-pex-pll-e-supply = <&vdd_3v3_lp0>;
+ vddio-pex-ctl-supply = <&vdd_3v3_lp0>;
+ avdd-pll-erefe-supply = <&avdd_1v05_run>;
+
+ /* Mini PCIe */
+ pci@1,0 {
+ phys = <&{/padctl@7009f000/pads/pcie/lanes/pcie-4}>;
+ phy-names = "pcie-0";
+ status = "okay";
+ };
+
+ /* Gigabit Ethernet */
+ pci@2,0 {
+ phys = <&{/padctl@7009f000/pads/pcie/lanes/pcie-2}>;
+ phy-names = "pcie-0";
+ status = "okay";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pci/pci-armada8k.txt b/Documentation/devicetree/bindings/pci/pci-armada8k.txt
new file mode 100644
index 000000000000..598533a57d79
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/pci-armada8k.txt
@@ -0,0 +1,38 @@
+* Marvell Armada 7K/8K PCIe interface
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: "marvell,armada8k-pcie"
+- reg: must contain two register regions
+ - the control register region
+ - the config space region
+- reg-names:
+ - "ctrl" for the control register region
+ - "config" for the config space region
+- interrupts: Interrupt specifier for the PCIe controler
+- clocks: reference to the PCIe controller clock
+
+Example:
+
+ pcie@f2600000 {
+ compatible = "marvell,armada8k-pcie", "snps,dw-pcie";
+ reg = <0 0xf2600000 0 0x10000>, <0 0xf6f00000 0 0x80000>;
+ reg-names = "ctrl", "config";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ device_type = "pci";
+ dma-coherent;
+
+ bus-range = <0 0xff>;
+ ranges = <0x81000000 0 0xf9000000 0 0xf9000000 0 0x10000 /* downstream I/O */
+ 0x82000000 0 0xf6000000 0 0xf6000000 0 0xf00000>; /* non-prefetchable memory */
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+ num-lanes = <1>;
+ clocks = <&cpm_syscon0 1 13>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/pci/pci-keystone.txt b/Documentation/devicetree/bindings/pci/pci-keystone.txt
index 54eae2938174..d08a4d51108f 100644
--- a/Documentation/devicetree/bindings/pci/pci-keystone.txt
+++ b/Documentation/devicetree/bindings/pci/pci-keystone.txt
@@ -56,6 +56,7 @@ Optional properties:-
phy-names: name of the Generic Keystine SerDes phy for PCI
- If boot loader already does PCI link establishment, then phys and
phy-names shouldn't be present.
+ interrupts: platform interrupt for error interrupts.
Designware DT Properties not applicable for Keystone PCI
diff --git a/Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt b/Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt
new file mode 100644
index 000000000000..0bf1ae243552
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/nvidia,tegra124-xusb-padctl.txt
@@ -0,0 +1,733 @@
+Device tree binding for NVIDIA Tegra XUSB pad controller
+========================================================
+
+The Tegra XUSB pad controller manages a set of I/O lanes (with differential
+signals) which connect directly to pins/pads on the SoC package. Each lane
+is controlled by a HW block referred to as a "pad" in the Tegra hardware
+documentation. Each such "pad" may control either one or multiple lanes,
+and thus contains any logic common to all its lanes. Each lane can be
+separately configured and powered up.
+
+Some of the lanes are high-speed lanes, which can be used for PCIe, SATA or
+super-speed USB. Other lanes are for various types of low-speed, full-speed
+or high-speed USB (such as UTMI, ULPI and HSIC). The XUSB pad controller
+contains a software-configurable mux that sits between the I/O controller
+ports (e.g. PCIe) and the lanes.
+
+In addition to per-lane configuration, USB 3.0 ports may require additional
+settings on a per-board basis.
+
+Pads will be represented as children of the top-level XUSB pad controller
+device tree node. Each lane exposed by the pad will be represented by its
+own subnode and can be referenced by users of the lane using the standard
+PHY bindings, as described by the phy-bindings.txt file in this directory.
+
+The Tegra hardware documentation refers to the connection between the XUSB
+pad controller and the XUSB controller as "ports". This is confusing since
+"port" is typically used to denote the physical USB receptacle. The device
+tree binding in this document uses the term "port" to refer to the logical
+abstraction of the signals that are routed to a USB receptacle (i.e. a PHY
+for the USB signal, the VBUS power supply, the USB 2.0 companion port for
+USB 3.0 receptacles, ...).
+
+Required properties:
+--------------------
+- compatible: Must be:
+ - Tegra124: "nvidia,tegra124-xusb-padctl"
+ - Tegra132: "nvidia,tegra132-xusb-padctl", "nvidia,tegra124-xusb-padctl"
+ - Tegra210: "nvidia,tegra210-xusb-padctl"
+- reg: Physical base address and length of the controller's registers.
+- resets: Must contain an entry for each entry in reset-names.
+- reset-names: Must include the following entries:
+ - "padctl"
+
+
+Pad nodes:
+==========
+
+A required child node named "pads" contains a list of subnodes, one for each
+of the pads exposed by the XUSB pad controller. Each pad may need additional
+resources that can be referenced in its pad node.
+
+The "status" property is used to enable or disable the use of a pad. If set
+to "disabled", the pad will not be used on the given board. In order to use
+the pad and any of its lanes, this property must be set to "okay".
+
+For Tegra124 and Tegra132, the following pads exist: usb2, ulpi, hsic, pcie
+and sata. No extra resources are required for operation of these pads.
+
+For Tegra210, the following pads exist: usb2, hsic, pcie and sata. Below is
+a description of the properties of each pad.
+
+UTMI pad:
+---------
+
+Required properties:
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Must contain the following entries:
+ - "trk": phandle and specifier referring to the USB2 tracking clock
+
+HSIC pad:
+---------
+
+Required properties:
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Must contain the following entries:
+ - "trk": phandle and specifier referring to the HSIC tracking clock
+
+PCIe pad:
+---------
+
+Required properties:
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Must contain the following entries:
+ - "pll": phandle and specifier referring to the PLLE
+- resets: Must contain an entry for each entry in reset-names.
+- reset-names: Must contain the following entries:
+ - "phy": reset for the PCIe UPHY block
+
+SATA pad:
+---------
+
+Required properties:
+- resets: Must contain an entry for each entry in reset-names.
+- reset-names: Must contain the following entries:
+ - "phy": reset for the SATA UPHY block
+
+
+PHY nodes:
+==========
+
+Each pad node has a child named "lanes" that contains one or more children of
+its own, each representing one of the lanes controlled by the pad.
+
+Required properties:
+--------------------
+- status: Defines the operation status of the PHY. Valid values are:
+ - "disabled": the PHY is disabled
+ - "okay": the PHY is enabled
+- #phy-cells: Should be 0. Since each lane represents a single PHY, there is
+ no need for an additional specifier.
+- nvidia,function: The output function of the PHY. See below for a list of
+ valid functions per SoC generation.
+
+For Tegra124 and Tegra132, the list of valid PHY nodes is given below:
+- usb2: usb2-0, usb2-1, usb2-2
+ - functions: "snps", "xusb", "uart"
+- ulpi: ulpi-0
+ - functions: "snps", "xusb"
+- hsic: hsic-0, hsic-1
+ - functions: "snps", "xusb"
+- pcie: pcie-0, pcie-1, pcie-2, pcie-3, pcie-4
+ - functions: "pcie", "usb3-ss"
+- sata: sata-0
+ - functions: "usb3-ss", "sata"
+
+For Tegra210, the list of valid PHY nodes is given below:
+- utmi: utmi-0, utmi-1, utmi-2, utmi-3
+ - functions: "snps", "xusb", "uart"
+- hsic: hsic-0, hsic-1
+ - functions: "snps", "xusb"
+- pcie: pcie-0, pcie-1, pcie-2, pcie-3, pcie-4, pcie-5, pcie-6
+ - functions: "pcie-x1", "usb3-ss", "pcie-x4"
+- sata: sata-0
+ - functions: "usb3-ss", "sata"
+
+
+Port nodes:
+===========
+
+A required child node named "ports" contains a list of all the ports exposed
+by the XUSB pad controller. Per-port configuration is only required for USB.
+
+USB2 ports:
+-----------
+
+Required properties:
+- status: Defines the operation status of the port. Valid values are:
+ - "disabled": the port is disabled
+ - "okay": the port is enabled
+- mode: A string that determines the mode in which to run the port. Valid
+ values are:
+ - "host": for USB host mode
+ - "device": for USB device mode
+ - "otg": for USB OTG mode
+
+Optional properties:
+- nvidia,internal: A boolean property whose presence determines that a port
+ is internal. In the absence of this property the port is considered to be
+ external.
+- vbus-supply: phandle to a regulator supplying the VBUS voltage.
+
+ULPI ports:
+-----------
+
+Optional properties:
+- status: Defines the operation status of the port. Valid values are:
+ - "disabled": the port is disabled
+ - "okay": the port is enabled
+- nvidia,internal: A boolean property whose presence determines that a port
+ is internal. In the absence of this property the port is considered to be
+ external.
+- vbus-supply: phandle to a regulator supplying the VBUS voltage.
+
+HSIC ports:
+-----------
+
+Required properties:
+- status: Defines the operation status of the port. Valid values are:
+ - "disabled": the port is disabled
+ - "okay": the port is enabled
+
+Optional properties:
+- vbus-supply: phandle to a regulator supplying the VBUS voltage.
+
+Super-speed USB ports:
+----------------------
+
+Required properties:
+- status: Defines the operation status of the port. Valid values are:
+ - "disabled": the port is disabled
+ - "okay": the port is enabled
+- nvidia,usb2-companion: A single cell that specifies the physical port number
+ to map this super-speed USB port to. The range of valid port numbers varies
+ with the SoC generation:
+ - 0-2: for Tegra124 and Tegra132
+ - 0-3: for Tegra210
+
+Optional properties:
+- nvidia,internal: A boolean property whose presence determines that a port
+ is internal. In the absence of this property the port is considered to be
+ external.
+
+For Tegra124 and Tegra132, the XUSB pad controller exposes the following
+ports:
+- 3x USB2: usb2-0, usb2-1, usb2-2
+- 1x ULPI: ulpi-0
+- 2x HSIC: hsic-0, hsic-1
+- 2x super-speed USB: usb3-0, usb3-1
+
+For Tegra210, the XUSB pad controller exposes the following ports:
+- 4x USB2: usb2-0, usb2-1, usb2-2, usb2-3
+- 2x HSIC: hsic-0, hsic-1
+- 4x super-speed USB: usb3-0, usb3-1, usb3-2, usb3-3
+
+
+Examples:
+=========
+
+Tegra124 and Tegra132:
+----------------------
+
+SoC include:
+
+ padctl@7009f000 {
+ /* for Tegra124 */
+ compatible = "nvidia,tegra124-xusb-padctl";
+ /* for Tegra132 */
+ compatible = "nvidia,tegra132-xusb-padctl",
+ "nvidia,tegra124-xusb-padctl";
+ reg = <0x0 0x7009f000 0x0 0x1000>;
+ resets = <&tegra_car 142>;
+ reset-names = "padctl";
+
+ pads {
+ usb2 {
+ status = "disabled";
+
+ lanes {
+ usb2-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ usb2-1 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ usb2-2 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+
+ ulpi {
+ status = "disabled";
+
+ lanes {
+ ulpi-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+
+ hsic {
+ status = "disabled";
+
+ lanes {
+ hsic-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ hsic-1 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+
+ pcie {
+ status = "disabled";
+
+ lanes {
+ pcie-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-1 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-2 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-3 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-4 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+
+ sata {
+ status = "disabled";
+
+ lanes {
+ sata-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+ };
+
+ ports {
+ usb2-0 {
+ status = "disabled";
+ };
+
+ usb2-1 {
+ status = "disabled";
+ };
+
+ usb2-2 {
+ status = "disabled";
+ };
+
+ ulpi-0 {
+ status = "disabled";
+ };
+
+ hsic-0 {
+ status = "disabled";
+ };
+
+ hsic-1 {
+ status = "disabled";
+ };
+
+ usb3-0 {
+ status = "disabled";
+ };
+
+ usb3-1 {
+ status = "disabled";
+ };
+ };
+ };
+
+Board file:
+
+ padctl@7009f000 {
+ status = "okay";
+
+ pads {
+ usb2 {
+ status = "okay";
+
+ lanes {
+ usb2-0 {
+ nvidia,function = "xusb";
+ status = "okay";
+ };
+
+ usb2-1 {
+ nvidia,function = "xusb";
+ status = "okay";
+ };
+
+ usb2-2 {
+ nvidia,function = "xusb";
+ status = "okay";
+ };
+ };
+ };
+
+ pcie {
+ status = "okay";
+
+ lanes {
+ pcie-0 {
+ nvidia,function = "usb3-ss";
+ status = "okay";
+ };
+
+ pcie-2 {
+ nvidia,function = "pcie";
+ status = "okay";
+ };
+
+ pcie-4 {
+ nvidia,function = "pcie";
+ status = "okay";
+ };
+ };
+ };
+
+ sata {
+ status = "okay";
+
+ lanes {
+ sata-0 {
+ nvidia,function = "sata";
+ status = "okay";
+ };
+ };
+ };
+ };
+
+ ports {
+ /* Micro A/B */
+ usb2-0 {
+ status = "okay";
+ mode = "otg";
+ };
+
+ /* Mini PCIe */
+ usb2-1 {
+ status = "okay";
+ mode = "host";
+ };
+
+ /* USB3 */
+ usb2-2 {
+ status = "okay";
+ mode = "host";
+
+ vbus-supply = <&vdd_usb3_vbus>;
+ };
+
+ usb3-0 {
+ nvidia,port = <2>;
+ status = "okay";
+ };
+ };
+ };
+
+Tegra210:
+---------
+
+SoC include:
+
+ padctl@7009f000 {
+ compatible = "nvidia,tegra210-xusb-padctl";
+ reg = <0x0 0x7009f000 0x0 0x1000>;
+ resets = <&tegra_car 142>;
+ reset-names = "padctl";
+
+ status = "disabled";
+
+ pads {
+ usb2 {
+ clocks = <&tegra_car TEGRA210_CLK_USB2_TRK>;
+ clock-names = "trk";
+ status = "disabled";
+
+ lanes {
+ usb2-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ usb2-1 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ usb2-2 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ usb2-3 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+
+ hsic {
+ clocks = <&tegra_car TEGRA210_CLK_HSIC_TRK>;
+ clock-names = "trk";
+ status = "disabled";
+
+ lanes {
+ hsic-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ hsic-1 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+
+ pcie {
+ clocks = <&tegra_car TEGRA210_CLK_PLL_E>;
+ clock-names = "pll";
+ resets = <&tegra_car 205>;
+ reset-names = "phy";
+ status = "disabled";
+
+ lanes {
+ pcie-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-1 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-2 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-3 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-4 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-5 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+
+ pcie-6 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+
+ sata {
+ clocks = <&tegra_car TEGRA210_CLK_PLL_E>;
+ clock-names = "pll";
+ resets = <&tegra_car 204>;
+ reset-names = "phy";
+ status = "disabled";
+
+ lanes {
+ sata-0 {
+ status = "disabled";
+ #phy-cells = <0>;
+ };
+ };
+ };
+ };
+
+ ports {
+ usb2-0 {
+ status = "disabled";
+ };
+
+ usb2-1 {
+ status = "disabled";
+ };
+
+ usb2-2 {
+ status = "disabled";
+ };
+
+ usb2-3 {
+ status = "disabled";
+ };
+
+ hsic-0 {
+ status = "disabled";
+ };
+
+ hsic-1 {
+ status = "disabled";
+ };
+
+ usb3-0 {
+ status = "disabled";
+ };
+
+ usb3-1 {
+ status = "disabled";
+ };
+
+ usb3-2 {
+ status = "disabled";
+ };
+
+ usb3-3 {
+ status = "disabled";
+ };
+ };
+ };
+
+Board file:
+
+ padctl@7009f000 {
+ status = "okay";
+
+ pads {
+ usb2 {
+ status = "okay";
+
+ lanes {
+ usb2-0 {
+ nvidia,function = "xusb";
+ status = "okay";
+ };
+
+ usb2-1 {
+ nvidia,function = "xusb";
+ status = "okay";
+ };
+
+ usb2-2 {
+ nvidia,function = "xusb";
+ status = "okay";
+ };
+
+ usb2-3 {
+ nvidia,function = "xusb";
+ status = "okay";
+ };
+ };
+ };
+
+ pcie {
+ status = "okay";
+
+ lanes {
+ pcie-0 {
+ nvidia,function = "pcie-x1";
+ status = "okay";
+ };
+
+ pcie-1 {
+ nvidia,function = "pcie-x4";
+ status = "okay";
+ };
+
+ pcie-2 {
+ nvidia,function = "pcie-x4";
+ status = "okay";
+ };
+
+ pcie-3 {
+ nvidia,function = "pcie-x4";
+ status = "okay";
+ };
+
+ pcie-4 {
+ nvidia,function = "pcie-x4";
+ status = "okay";
+ };
+
+ pcie-5 {
+ nvidia,function = "usb3-ss";
+ status = "okay";
+ };
+
+ pcie-6 {
+ nvidia,function = "usb3-ss";
+ status = "okay";
+ };
+ };
+ };
+
+ sata {
+ status = "okay";
+
+ lanes {
+ sata-0 {
+ nvidia,function = "sata";
+ status = "okay";
+ };
+ };
+ };
+ };
+
+ ports {
+ usb2-0 {
+ status = "okay";
+ mode = "otg";
+ };
+
+ usb2-1 {
+ status = "okay";
+ vbus-supply = <&vdd_5v0_rtl>;
+ mode = "host";
+ };
+
+ usb2-2 {
+ status = "okay";
+ vbus-supply = <&vdd_usb_vbus>;
+ mode = "host";
+ };
+
+ usb2-3 {
+ status = "okay";
+ mode = "host";
+ };
+
+ usb3-0 {
+ status = "okay";
+ nvidia,lanes = "pcie-6";
+ nvidia,port = <1>;
+ };
+
+ usb3-1 {
+ status = "okay";
+ nvidia,lanes = "pcie-5";
+ nvidia,port = <2>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/phy/phy-lpc18xx-usb-otg.txt b/Documentation/devicetree/bindings/phy/phy-lpc18xx-usb-otg.txt
index bd61b467e30a..3bb821cd6a7f 100644
--- a/Documentation/devicetree/bindings/phy/phy-lpc18xx-usb-otg.txt
+++ b/Documentation/devicetree/bindings/phy/phy-lpc18xx-usb-otg.txt
@@ -18,7 +18,7 @@ creg: syscon@40043000 {
compatible = "nxp,lpc1850-creg", "syscon", "simple-mfd";
reg = <0x40043000 0x1000>;
- usb0_otg_phy: phy@004 {
+ usb0_otg_phy: phy {
compatible = "nxp,lpc1850-usb-otg-phy";
clocks = <&ccu1 CLK_USB0>;
#phy-cells = <0>;
diff --git a/Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt
index 50c4f9b00adf..e3b4809fbe82 100644
--- a/Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt
+++ b/Documentation/devicetree/bindings/phy/rockchip-dp-phy.txt
@@ -8,15 +8,19 @@ Required properties:
of memory mapped region.
- clock-names: from common clock binding:
Required elements: "24m"
-- rockchip,grf: phandle to the syscon managing the "general register files"
- #phy-cells : from the generic PHY bindings, must be 0;
Example:
-edp_phy: edp-phy {
- compatible = "rockchip,rk3288-dp-phy";
- rockchip,grf = <&grf>;
- clocks = <&cru SCLK_EDP_24M>;
- clock-names = "24m";
- #phy-cells = <0>;
+grf: syscon@ff770000 {
+ compatible = "rockchip,rk3288-grf", "syscon", "simple-mfd";
+
+...
+
+ edp_phy: edp-phy {
+ compatible = "rockchip,rk3288-dp-phy";
+ clocks = <&cru SCLK_EDP_24M>;
+ clock-names = "24m";
+ #phy-cells = <0>;
+ };
};
diff --git a/Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt
index 61916f15a949..555cb0f40690 100644
--- a/Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt
+++ b/Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt
@@ -3,17 +3,23 @@ Rockchip EMMC PHY
Required properties:
- compatible: rockchip,rk3399-emmc-phy
- - rockchip,grf : phandle to the syscon managing the "general
- register files"
- #phy-cells: must be 0
- - reg: PHY configure reg address offset in "general
+ - reg: PHY register address offset and length in "general
register files"
Example:
-emmcphy: phy {
- compatible = "rockchip,rk3399-emmc-phy";
- rockchip,grf = <&grf>;
- reg = <0xf780>;
- #phy-cells = <0>;
+
+grf: syscon@ff770000 {
+ compatible = "rockchip,rk3399-grf", "syscon", "simple-mfd";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+...
+
+ emmcphy: phy@f780 {
+ compatible = "rockchip,rk3399-emmc-phy";
+ reg = <0xf780 0x20>;
+ #phy-cells = <0>;
+ };
};
diff --git a/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt
index 08a4a32c8eb0..0326154c7925 100644
--- a/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/img,pistachio-pinctrl.txt
@@ -134,12 +134,12 @@ mfio80 ddr_debug, mips_trace_data, mips_debug
mfio81 dreq0, mips_trace_data, eth_debug
mfio82 dreq1, mips_trace_data, eth_debug
mfio83 mips_pll_lock, mips_trace_data, usb_debug
-mfio84 sys_pll_lock, mips_trace_data, usb_debug
-mfio85 wifi_pll_lock, mips_trace_data, sdhost_debug
-mfio86 bt_pll_lock, mips_trace_data, sdhost_debug
-mfio87 rpu_v_pll_lock, dreq2, socif_debug
-mfio88 rpu_l_pll_lock, dreq3, socif_debug
-mfio89 audio_pll_lock, dreq4, dreq5
+mfio84 audio_pll_lock, mips_trace_data, usb_debug
+mfio85 rpu_v_pll_lock, mips_trace_data, sdhost_debug
+mfio86 rpu_l_pll_lock, mips_trace_data, sdhost_debug
+mfio87 sys_pll_lock, dreq2, socif_debug
+mfio88 wifi_pll_lock, dreq3, socif_debug
+mfio89 bt_pll_lock, dreq4, dreq5
tck
trstn
tdi
diff --git a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
index 3f6a524cc5ff..32f4a2d6d0b3 100644
--- a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
@@ -1,13 +1,16 @@
== Amlogic Meson pinmux controller ==
Required properties for the root node:
- - compatible: "amlogic,meson8-pinctrl" or "amlogic,meson8b-pinctrl"
+ - compatible: one of "amlogic,meson8-cbus-pinctrl"
+ "amlogic,meson8b-cbus-pinctrl"
+ "amlogic,meson8-aobus-pinctrl"
+ "amlogic,meson8b-aobus-pinctrl"
- reg: address and size of registers controlling irq functionality
=== GPIO sub-nodes ===
-The 2 power domains of the controller (regular and always-on) are
-represented as sub-nodes and each of them acts as a GPIO controller.
+The GPIO bank for the controller is represented as a sub-node and it acts as a
+GPIO controller.
Required properties for sub-nodes are:
- reg: should contain address and size for mux, pull-enable, pull and
@@ -18,10 +21,6 @@ Required properties for sub-nodes are:
- gpio-controller: identifies the node as a gpio controller
- #gpio-cells: must be 2
-Valid sub-node names are:
- - "banks" for the regular domain
- - "ao-bank" for the always-on domain
-
=== Other sub-nodes ===
Child nodes without the "gpio-controller" represent some desired
@@ -45,7 +44,7 @@ pinctrl-bindings.txt
=== Example ===
pinctrl: pinctrl@c1109880 {
- compatible = "amlogic,meson8-pinctrl";
+ compatible = "amlogic,meson8-cbus-pinctrl";
reg = <0xc1109880 0x10>;
#address-cells = <1>;
#size-cells = <1>;
@@ -61,15 +60,6 @@ pinctrl-bindings.txt
#gpio-cells = <2>;
};
- gpio_ao: ao-bank@c1108030 {
- reg = <0xc8100014 0x4>,
- <0xc810002c 0x4>,
- <0xc8100024 0x8>;
- reg-names = "mux", "pull", "gpio";
- gpio-controller;
- #gpio-cells = <2>;
- };
-
nand {
mux {
groups = "nand_io", "nand_io_ce0", "nand_io_ce1",
@@ -79,18 +69,4 @@ pinctrl-bindings.txt
function = "nand";
};
};
-
- uart_ao_a {
- mux {
- groups = "uart_tx_ao_a", "uart_rx_ao_a",
- "uart_cts_ao_a", "uart_rts_ao_a";
- function = "uart_ao";
- };
-
- conf {
- pins = "GPIOAO_0", "GPIOAO_1",
- "GPIOAO_2", "GPIOAO_3";
- bias-disable;
- };
- };
};
diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt
index 30676ded85bb..8a6223dbc143 100644
--- a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt
@@ -1,6 +1,12 @@
Device tree binding for NVIDIA Tegra XUSB pad controller
========================================================
+NOTE: It turns out that this binding isn't an accurate description of the XUSB
+pad controller. While the description is good enough for the functional subset
+required for PCIe and SATA, it lacks the flexibility to represent the features
+needed for USB. For the new binding, see ../phy/nvidia,tegra-xusb-padctl.txt.
+The binding described in this file is deprecated and should not be used.
+
The Tegra XUSB pad controller manages a set of lanes, each of which can be
assigned to one out of a set of different pads. Some of these pads have an
associated PHY that must be powered up before the pad can be used.
diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
index ffadb7a371f6..74e6ec0339d6 100644
--- a/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
@@ -72,8 +72,8 @@ Pin Configuration Node Properties:
The pin configuration parameters use the generic pinconf bindings defined in
pinctrl-bindings.txt in this directory. The supported parameters are
-bias-disable, bias-pull-up, bias-pull-down and power-source. For pins that
-have a configurable I/O voltage, the power-source value should be the
+bias-disable, bias-pull-up, bias-pull-down, drive strength and power-source. For
+pins that have a configurable I/O voltage, the power-source value should be the
nominal I/O voltage in millivolts.
diff --git a/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt
new file mode 100644
index 000000000000..b74e4d4785ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/renesas,rcar-sysc.txt
@@ -0,0 +1,48 @@
+DT bindings for the Renesas R-Car System Controller
+
+== System Controller Node ==
+
+The R-Car System Controller provides power management for the CPU cores and
+various coprocessors.
+
+Required properties:
+ - compatible: Must contain exactly one of the following:
+ - "renesas,r8a7779-sysc" (R-Car H1)
+ - "renesas,r8a7790-sysc" (R-Car H2)
+ - "renesas,r8a7791-sysc" (R-Car M2-W)
+ - "renesas,r8a7792-sysc" (R-Car V2H)
+ - "renesas,r8a7793-sysc" (R-Car M2-N)
+ - "renesas,r8a7794-sysc" (R-Car E2)
+ - "renesas,r8a7795-sysc" (R-Car H3)
+ - reg: Address start and address range for the device.
+ - #power-domain-cells: Must be 1.
+
+
+Example:
+
+ sysc: system-controller@e6180000 {
+ compatible = "renesas,r8a7791-sysc";
+ reg = <0 0xe6180000 0 0x0200>;
+ #power-domain-cells = <1>;
+ };
+
+
+== PM Domain Consumers ==
+
+Devices residing in a power area must refer to that power area, as documented
+by the generic PM domain bindings in
+Documentation/devicetree/bindings/power/power_domain.txt.
+
+Required properties:
+ - power-domains: A phandle and symbolic PM domain specifier, as defined in
+ <dt-bindings/power/r8a77*-sysc.h>.
+
+
+Example:
+
+ L2_CA15: cache-controller@0 {
+ compatible = "cache";
+ power-domains = <&sysc R8A7791_PD_CA15_SCU>;
+ cache-unified;
+ cache-level = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-poweroff.txt b/Documentation/devicetree/bindings/power/reset/gpio-poweroff.txt
index d4eab9227ea4..d4eab9227ea4 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-poweroff.txt
+++ b/Documentation/devicetree/bindings/power/reset/gpio-poweroff.txt
diff --git a/Documentation/devicetree/bindings/gpio/gpio-restart.txt b/Documentation/devicetree/bindings/power/reset/gpio-restart.txt
index af3701bc15c4..af3701bc15c4 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-restart.txt
+++ b/Documentation/devicetree/bindings/power/reset/gpio-restart.txt
diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
index c84fb47265eb..d23dc002a87e 100644
--- a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
@@ -37,8 +37,10 @@ Required properties:
- "rockchip,rk3368-pmu-io-voltage-domain" for rk3368 pmu-domains
- "rockchip,rk3399-io-voltage-domain" for rk3399
- "rockchip,rk3399-pmu-io-voltage-domain" for rk3399 pmu-domains
-- rockchip,grf: phandle to the syscon managing the "general register files"
+Deprecated properties:
+- rockchip,grf: phandle to the syscon managing the "general register files"
+ Systems should move the io-domains to a sub-node of the grf simple-mfd.
You specify supplies using the standard regulator bindings by including
a phandle the relevant regulator. All specified supplies must be able
diff --git a/Documentation/devicetree/bindings/regmap/regmap.txt b/Documentation/devicetree/bindings/regmap/regmap.txt
index e98a9652ccc8..0127be360fe8 100644
--- a/Documentation/devicetree/bindings/regmap/regmap.txt
+++ b/Documentation/devicetree/bindings/regmap/regmap.txt
@@ -1,50 +1,29 @@
-Device-Tree binding for regmap
-
-The endianness mode of CPU & Device scenarios:
-Index Device Endianness properties
----------------------------------------------------
-1 BE 'big-endian'
-2 LE 'little-endian'
-3 Native 'native-endian'
-
-For one device driver, which will run in different scenarios above
-on different SoCs using the devicetree, we need one way to simplify
-this.
+Devicetree binding for regmap
Optional properties:
-- {big,little,native}-endian: these are boolean properties, if absent
- then the implementation will choose a default based on the device
- being controlled. These properties are for register values and all
- the buffers only. Native endian means that the CPU and device have
- the same endianness.
-Examples:
-Scenario 1 : CPU in LE mode & device in LE mode.
-dev: dev@40031000 {
- compatible = "name";
- reg = <0x40031000 0x1000>;
- ...
-};
+ little-endian,
+ big-endian,
+ native-endian: See common-properties.txt for a definition
-Scenario 2 : CPU in LE mode & device in BE mode.
-dev: dev@40031000 {
- compatible = "name";
- reg = <0x40031000 0x1000>;
- ...
- big-endian;
-};
+Note:
+Regmap defaults to little-endian register access on MMIO based
+devices, this is by far the most common setting. On CPU
+architectures that typically run big-endian operating systems
+(e.g. PowerPC), registers can be defined as big-endian and must
+be marked that way in the devicetree.
-Scenario 3 : CPU in BE mode & device in BE mode.
-dev: dev@40031000 {
- compatible = "name";
- reg = <0x40031000 0x1000>;
- ...
-};
+On SoCs that can be operated in both big-endian and little-endian
+modes, with a single hardware switch controlling both the endianess
+of the CPU and a byteswap for MMIO registers (e.g. many Broadcom MIPS
+chips), "native-endian" is used to allow using the same device tree
+blob in both cases.
-Scenario 4 : CPU in BE mode & device in LE mode.
+Examples:
+Scenario 1 : a register set in big-endian mode.
dev: dev@40031000 {
- compatible = "name";
+ compatible = "syscon";
reg = <0x40031000 0x1000>;
+ big-endian;
...
- little-endian;
};
diff --git a/Documentation/devicetree/bindings/regulator/max8973-regulator.txt b/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
index f80ea2fe27e6..c2c68fcc1b41 100644
--- a/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
@@ -32,6 +32,13 @@ Optional properties:
Enhanced transient response (ETR) will affect the configuration of CKADV.
+-junction-warn-millicelsius: u32, junction warning temperature threshold
+ in millicelsius. If die temperature crosses this level then
+ device generates the warning interrupts.
+
+Please note that thermal functionality is only supported on MAX77621. The
+supported threshold warning temperature for MAX77621 are 120 degC and 140 degC.
+
Example:
max8973@1b {
diff --git a/Documentation/devicetree/bindings/regulator/pv88080.txt b/Documentation/devicetree/bindings/regulator/pv88080.txt
new file mode 100644
index 000000000000..38a614210dcb
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/pv88080.txt
@@ -0,0 +1,49 @@
+* Powerventure Semiconductor PV88080 Voltage Regulator
+
+Required properties:
+- compatible: "pvs,pv88080".
+- reg: I2C slave address, usually 0x49.
+- interrupts: the interrupt outputs of the controller
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the node's name, with valid
+ values listed below. The content of each sub-node is defined by the
+ standard binding for regulators; see regulator.txt.
+ BUCK1, BUCK2, and BUCK3.
+
+Optional properties:
+- Any optional property defined in regulator.txt
+
+Example
+
+ pmic: pv88080@49 {
+ compatible = "pvs,pv88080";
+ reg = <0x49>;
+ interrupt-parent = <&gpio>;
+ interrupts = <24 24>;
+
+ regulators {
+ BUCK1 {
+ regulator-name = "buck1";
+ regulator-min-microvolt = < 600000>;
+ regulator-max-microvolt = <1393750>;
+ regulator-min-microamp = < 220000>;
+ regulator-max-microamp = <7040000>;
+ };
+
+ BUCK2 {
+ regulator-name = "buck2";
+ regulator-min-microvolt = < 600000>;
+ regulator-max-microvolt = <1393750>;
+ regulator-min-microamp = <1496000>;
+ regulator-max-microamp = <4189000>;
+ };
+
+ BUCK3 {
+ regulator-name = "buck3";
+ regulator-min-microvolt = <1400000>;
+ regulator-max-microvolt = <2193750>;
+ regulator-min-microamp = <1496000>;
+ regulator-max-microamp = <4189000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
index d00bfd8624a5..46c6f3ed1a1c 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt
@@ -7,6 +7,7 @@ Qualcomm SPMI Regulators
"qcom,pm8841-regulators"
"qcom,pm8916-regulators"
"qcom,pm8941-regulators"
+ "qcom,pm8994-regulators"
- interrupts:
Usage: optional
@@ -68,6 +69,37 @@ Qualcomm SPMI Regulators
Definition: Reference to regulator supplying the input pin, as
described in the data sheet.
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
+- vdd_s5-supply:
+- vdd_s6-supply:
+- vdd_s7-supply:
+- vdd_s8-supply:
+- vdd_s9-supply:
+- vdd_s10-supply:
+- vdd_s11-supply:
+- vdd_s12-supply:
+- vdd_l1-supply:
+- vdd_l2_l26_l28-supply:
+- vdd_l3_l11-supply:
+- vdd_l4_l27_l31-supply:
+- vdd_l5_l7-supply:
+- vdd_l6_l12_l32-supply:
+- vdd_l8_l16_l30-supply:
+- vdd_l9_l10_l18_l22-supply:
+- vdd_l13_l19_l23_l24-supply:
+- vdd_l14_l15-supply:
+- vdd_l17_l29-supply:
+- vdd_l20_l21-supply:
+- vdd_l25-supply:
+- vdd_lvs_1_2-supply:
+ Usage: optional (pm8994 only)
+ Value type: <phandle>
+ Definition: Reference to regulator supplying the input pin, as
+ described in the data sheet.
+
The regulator node houses sub-nodes for each regulator within the device. Each
sub-node is identified using the node's name, with valid values listed for each
@@ -85,6 +117,11 @@ pm8941:
l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, lvs1, lvs2, lvs3,
mvs1, mvs2
+pm8994:
+ s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, l1, l2, l3, l4, l5,
+ l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20,
+ l21, l22, l23, l24, l25, l26, l27, l28, l29, l30, l31, l32, lvs1, lvs2
+
The content of each sub-node is defined by the standard binding for regulators -
see regulator.txt - with additional custom properties described below:
diff --git a/Documentation/devicetree/bindings/regulator/regulator-max77620.txt b/Documentation/devicetree/bindings/regulator/regulator-max77620.txt
index b3c8ca672024..1c4bfe786736 100644
--- a/Documentation/devicetree/bindings/regulator/regulator-max77620.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator-max77620.txt
@@ -94,6 +94,28 @@ Following are additional properties:
This is applicable if suspend state
FPS source is selected as FPS0, FPS1 or
FPS2.
+- maxim,ramp-rate-setting: integer, ramp rate(uV/us) setting to be
+ configured to the device.
+ The platform may have different ramp
+ rate than advertised ramp rate if it has
+ design variation from Maxim's
+ recommended. On this case, platform
+ specific ramp rate is used for ramp time
+ calculation and this property is used
+ for device register configurations.
+ The measured ramp rate of platform is
+ provided by the regulator-ramp-delay
+ as described in <devicetree/bindings/
+ regulator/regulator.txt>.
+ Maxim Max77620 supports following ramp
+ delay:
+ SD: 13.75mV/us, 27.5mV/us, 55mV/us
+ LDOs: 5mV/us, 100mV/us
+
+Note: If the measured ramp delay is same as advertised ramp delay then it is not
+required to provide the ramp delay with property "maxim,ramp-rate-setting". The
+ramp rate can be provided by the regulator-ramp-delay which will be used for
+ramp time calculation for voltage change as well as for device configuration.
Example:
--------
diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
index c58db75f959e..c3f6546ebac7 100644
--- a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
@@ -14,8 +14,8 @@ Required Properties:
- "setup-address" - contains setup register address of ABB module (ti,abb-v3)
- "int-address" - contains address of interrupt register for ABB module
(also see Optional properties)
-- #address-cell: should be 0
-- #size-cell: should be 0
+- #address-cells: should be 0
+- #size-cells: should be 0
- clocks: should point to the clock node used by ABB module
- ti,settling-time: Settling time in uSecs from SoC documentation for ABB module
to settle down(target time for SR2_WTCNT_VALUE).
@@ -69,7 +69,7 @@ Example #1: Simplest configuration (no efuse data, hard coded ABB table):
abb_x: regulator-abb-x {
compatible = "ti,abb-v1";
regulator-name = "abb_x";
- #address-cell = <0>;
+ #address-cells = <0>;
#size-cells = <0>;
reg = <0x483072f0 0x8>, <0x48306818 0x4>;
reg-names = "base-address", "int-address";
@@ -89,7 +89,7 @@ Example #2: Efuse bits contain ABB mode setting (no LDO override capability)
abb_y: regulator-abb-y {
compatible = "ti,abb-v2";
regulator-name = "abb_y";
- #address-cell = <0>;
+ #address-cells = <0>;
#size-cells = <0>;
reg = <0x4a307bd0 0x8>, <0x4a306014 0x4>, <0x4A002268 0x8>;
reg-names = "base-address", "int-address", "efuse-address";
@@ -110,7 +110,7 @@ Example #3: Efuse bits contain ABB mode setting and LDO override capability
abb_z: regulator-abb-z {
compatible = "ti,abb-v2";
regulator-name = "abb_z";
- #address-cell = <0>;
+ #address-cells = <0>;
#size-cells = <0>;
reg = <0x4ae07ce4 0x8>, <0x4ae06010 0x4>,
<0x4a002194 0x8>, <0x4ae0C314 0x4>;
diff --git a/Documentation/devicetree/bindings/regulator/twl-regulator.txt b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
index 75b0c1669504..74a91c4f8530 100644
--- a/Documentation/devicetree/bindings/regulator/twl-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
@@ -57,6 +57,12 @@ For twl4030 regulators/LDOs
Optional properties:
- Any optional property defined in bindings/regulator/regulator.txt
+For twl4030 regulators/LDOs:
+ - regulator-initial-mode:
+ - 0x08 - Sleep mode, the nominal output voltage is maintained with low power
+ consumption with low load current capability.
+ - 0x0e - Active mode, the regulator can deliver its nominal output voltage
+ with full-load current capability.
Example:
diff --git a/Documentation/devicetree/bindings/reset/oxnas,reset.txt b/Documentation/devicetree/bindings/reset/oxnas,reset.txt
new file mode 100644
index 000000000000..6f06db930030
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/oxnas,reset.txt
@@ -0,0 +1,58 @@
+Oxford Semiconductor OXNAS SoC Family RESET Controller
+================================================
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required properties:
+- compatible: Should be "oxsemi,ox810se-reset"
+- #reset-cells: 1, see below
+
+Parent node should have the following properties :
+- compatible: Should be "oxsemi,ox810se-sys-ctrl", "syscon", "simple-mfd"
+
+For OX810SE, the indices are :
+ - 0 : ARM
+ - 1 : COPRO
+ - 2 : Reserved
+ - 3 : Reserved
+ - 4 : USBHS
+ - 5 : USBHSPHY
+ - 6 : MAC
+ - 7 : PCI
+ - 8 : DMA
+ - 9 : DPE
+ - 10 : DDR
+ - 11 : SATA
+ - 12 : SATA_LINK
+ - 13 : SATA_PHY
+ - 14 : Reserved
+ - 15 : NAND
+ - 16 : GPIO
+ - 17 : UART1
+ - 18 : UART2
+ - 19 : MISC
+ - 20 : I2S
+ - 21 : AHB_MON
+ - 22 : UART3
+ - 23 : UART4
+ - 24 : SGDMA
+ - 25 : Reserved
+ - 26 : Reserved
+ - 27 : Reserved
+ - 28 : Reserved
+ - 29 : Reserved
+ - 30 : Reserved
+ - 31 : BUS
+
+example:
+
+sys: sys-ctrl@000000 {
+ compatible = "oxsemi,ox810se-sys-ctrl", "syscon", "simple-mfd";
+ reg = <0x000000 0x100000>;
+
+ reset: reset-controller {
+ compatible = "oxsemi,ox810se-reset";
+ #reset-cells = <1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/rng/hisi-rng.txt b/Documentation/devicetree/bindings/rng/hisi-rng.txt
new file mode 100644
index 000000000000..d04d55a6c2f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/hisi-rng.txt
@@ -0,0 +1,12 @@
+Hisilicon Random Number Generator
+
+Required properties:
+- compatible : Should be "hisilicon,hip04-rng" or "hisilicon,hip05-rng"
+- reg : Offset and length of the register set of this block
+
+Example:
+
+rng@d1010000 {
+ compatible = "hisilicon,hip05-rng";
+ reg = <0xd1010000 0x100>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
index 1068ffce9f91..fdde63a5419c 100644
--- a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
+++ b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
@@ -15,9 +15,10 @@ Required properties:
is the rtc tick interrupt. The number of cells representing a interrupt
depends on the parent interrupt controller.
- clocks: Must contain a list of phandle and clock specifier for the rtc
- and source clocks.
-- clock-names: Must contain "rtc" and "rtc_src" entries sorted in the
- same order as the clocks property.
+ clock and in the case of a s3c6410 compatible controller, also
+ a source clock.
+- clock-names: Must contain "rtc" and for a s3c6410 compatible controller,
+ a "rtc_src" sorted in the same order as the clocks property.
Example:
diff --git a/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt b/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt
new file mode 100644
index 000000000000..65b38bf60ae0
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/microchip,pic32-uart.txt
@@ -0,0 +1,29 @@
+* Microchip Universal Asynchronous Receiver Transmitter (UART)
+
+Required properties:
+- compatible: Should be "microchip,pic32mzda-uart"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt
+- clocks: Phandle to the clock.
+ See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+- pinctrl-names: A pinctrl state names "default" must be defined.
+- pinctrl-0: Phandle referencing pin configuration of the UART peripheral.
+ See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+
+Optional properties:
+- cts-gpios: CTS pin for UART
+
+Example:
+ uart1: serial@1f822000 {
+ compatible = "microchip,pic32mzda-uart";
+ reg = <0x1f822000 0x50>;
+ interrupts = <112 IRQ_TYPE_LEVEL_HIGH>,
+ <113 IRQ_TYPE_LEVEL_HIGH>,
+ <114 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&PBCLK2>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart1
+ &pinctrl_uart1_cts
+ &pinctrl_uart1_rts>;
+ cts-gpios = <&gpio1 15 0>;
+ };
diff --git a/Documentation/devicetree/bindings/soc/mediatek/auxadc.txt b/Documentation/devicetree/bindings/soc/mediatek/auxadc.txt
new file mode 100644
index 000000000000..bdb782918a72
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mediatek/auxadc.txt
@@ -0,0 +1,21 @@
+MediaTek AUXADC
+===============
+
+The Auxiliary Analog/Digital Converter (AUXADC) is an ADC found
+in some Mediatek SoCs which among other things measures the temperatures
+in the SoC. It can be used directly with register accesses, but it is also
+used by thermal controller which reads the temperatures from the AUXADC
+directly via its own bus interface. See
+Documentation/devicetree/bindings/thermal/mediatek-thermal.txt
+for the Thermal Controller which holds a phandle to the AUXADC.
+
+Required properties:
+- compatible: Must be "mediatek,mt8173-auxadc"
+- reg: Address range of the AUXADC unit
+
+Example:
+
+auxadc: auxadc@11001000 {
+ compatible = "mediatek,mt8173-auxadc";
+ reg = <0 0x11001000 0 0x1000>;
+};
diff --git a/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt b/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
index ddeb5b6a53c1..107700d00df4 100644
--- a/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
+++ b/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
@@ -18,6 +18,7 @@ IP Pairing
Required properties in pwrap device node.
- compatible:
+ "mediatek,mt2701-pwrap" for MT2701/7623 SoCs
"mediatek,mt8135-pwrap" for MT8135 SoCs
"mediatek,mt8173-pwrap" for MT8173 SoCs
- interrupts: IRQ for pwrap in SOC
diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.txt b/Documentation/devicetree/bindings/soc/rockchip/grf.txt
new file mode 100644
index 000000000000..013e71a2cdc7
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.txt
@@ -0,0 +1,35 @@
+* Rockchip General Register Files (GRF)
+
+The general register file will be used to do static set by software, which
+is composed of many registers for system control.
+
+From RK3368 SoCs, the GRF is divided into two sections,
+- GRF, used for general non-secure system,
+- PMUGRF, used for always on system
+
+Required Properties:
+
+- compatible: GRF should be one of the followings
+ - "rockchip,rk3066-grf", "syscon": for rk3066
+ - "rockchip,rk3188-grf", "syscon": for rk3188
+ - "rockchip,rk3228-grf", "syscon": for rk3228
+ - "rockchip,rk3288-grf", "syscon": for rk3288
+ - "rockchip,rk3368-grf", "syscon": for rk3368
+ - "rockchip,rk3399-grf", "syscon": for rk3399
+- compatible: PMUGRF should be one of the followings
+ - "rockchip,rk3368-pmugrf", "syscon": for rk3368
+ - "rockchip,rk3399-pmugrf", "syscon": for rk3399
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+Example: GRF and PMUGRF of RK3399 SoCs
+
+ pmugrf: syscon@ff320000 {
+ compatible = "rockchip,rk3399-pmugrf", "syscon";
+ reg = <0x0 0xff320000 0x0 0x1000>;
+ };
+
+ grf: syscon@ff770000 {
+ compatible = "rockchip,rk3399-grf", "syscon";
+ reg = <0x0 0xff770000 0x0 0x10000>;
+ };
diff --git a/Documentation/devicetree/bindings/soc/rockchip/power_domain.txt b/Documentation/devicetree/bindings/soc/rockchip/power_domain.txt
index 13dc6a3fdb4a..f909ce06afc4 100644
--- a/Documentation/devicetree/bindings/soc/rockchip/power_domain.txt
+++ b/Documentation/devicetree/bindings/soc/rockchip/power_domain.txt
@@ -7,6 +7,7 @@ Required properties for power domain controller:
- compatible: Should be one of the following.
"rockchip,rk3288-power-controller" - for RK3288 SoCs.
"rockchip,rk3368-power-controller" - for RK3368 SoCs.
+ "rockchip,rk3399-power-controller" - for RK3399 SoCs.
- #power-domain-cells: Number of cells in a power-domain specifier.
Should be 1 for multiple PM domains.
- #address-cells: Should be 1.
@@ -16,8 +17,18 @@ Required properties for power domain sub nodes:
- reg: index of the power domain, should use macros in:
"include/dt-bindings/power/rk3288-power.h" - for RK3288 type power domain.
"include/dt-bindings/power/rk3368-power.h" - for RK3368 type power domain.
+ "include/dt-bindings/power/rk3399-power.h" - for RK3399 type power domain.
- clocks (optional): phandles to clocks which need to be enabled while power domain
switches state.
+- pm_qos (optional): phandles to qos blocks which need to be saved and restored
+ while power domain switches state.
+
+Qos Example:
+
+ qos_gpu: qos_gpu@ffaf0000 {
+ compatible ="syscon";
+ reg = <0x0 0xffaf0000 0x0 0x20>;
+ };
Example:
@@ -30,6 +41,7 @@ Example:
pd_gpu {
reg = <RK3288_PD_GPU>;
clocks = <&cru ACLK_GPU>;
+ pm_qos = <&qos_gpu>;
};
};
@@ -45,12 +57,41 @@ Example:
};
};
+Example 2:
+ power: power-controller {
+ compatible = "rockchip,rk3399-power-controller";
+ #power-domain-cells = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pd_vio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <RK3399_PD_VIO>;
+
+ pd_vo {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <RK3399_PD_VO>;
+
+ pd_vopb {
+ reg = <RK3399_PD_VOPB>;
+ };
+
+ pd_vopl {
+ reg = <RK3399_PD_VOPL>;
+ };
+ };
+ };
+ };
+
Node of a device using power domains must have a power-domains property,
containing a phandle to the power device node and an index specifying which
power domain to use.
The index should use macros in:
"include/dt-bindings/power/rk3288-power.h" - for rk3288 type power domain.
"include/dt-bindings/power/rk3368-power.h" - for rk3368 type power domain.
+ "include/dt-bindings/power/rk3399-power.h" - for rk3399 type power domain.
Example of the node using power domain:
@@ -65,3 +106,9 @@ Example of the node using power domain:
power-domains = <&power RK3368_PD_GPU_1>;
/* ... */
};
+
+ node {
+ /* ... */
+ power-domains = <&power RK3399_PD_VOPB>;
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/sound/davinci-mcbsp.txt b/Documentation/devicetree/bindings/sound/davinci-mcbsp.txt
new file mode 100644
index 000000000000..55b53e1fd72c
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/davinci-mcbsp.txt
@@ -0,0 +1,51 @@
+Texas Instruments DaVinci McBSP module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This binding describes the "Multi-channel Buffered Serial Port" (McBSP)
+audio interface found in some TI DaVinci processors like the OMAP-L138 or AM180x.
+
+
+Required properties:
+~~~~~~~~~~~~~~~~~~~~
+- compatible :
+ "ti,da850-mcbsp" : for DA850, AM180x and OPAM-L138 platforms
+
+- reg : physical base address and length of the controller memory mapped
+ region(s).
+- reg-names : Should contain:
+ * "mpu" for the main registers (required).
+ * "dat" for the data FIFO (optional).
+
+- dmas: three element list of DMA controller phandles, DMA request line and
+ TC channel ordered triplets.
+- dma-names: identifier string for each DMA request line in the dmas property.
+ These strings correspond 1:1 with the ordered pairs in dmas. The dma
+ identifiers must be "rx" and "tx".
+
+Optional properties:
+~~~~~~~~~~~~~~~~~~~~
+- interrupts : Interrupt numbers for McBSP
+- interrupt-names : Known interrupt names are "rx" and "tx"
+
+- pinctrl-0: Should specify pin control group used for this controller.
+- pinctrl-names: Should contain only one value - "default", for more details
+ please refer to pinctrl-bindings.txt
+
+Example (AM1808):
+~~~~~~~~~~~~~~~~~
+
+mcbsp0: mcbsp@1d10000 {
+ compatible = "ti,da850-mcbsp";
+ pinctrl-names = "default";
+ pinctrl-0 = <&mcbsp0_pins>;
+
+ reg = <0x00110000 0x1000>,
+ <0x00310000 0x1000>;
+ reg-names = "mpu", "dat";
+ interrupts = <97 98>;
+ interrupts-names = "rx", "tx";
+ dmas = <&edma0 3 1
+ &edma0 2 1>;
+ dma-names = "tx", "rx";
+ status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/sound/fsl-sai.txt b/Documentation/devicetree/bindings/sound/fsl-sai.txt
index 044e5d76e2dd..740b467adf7d 100644
--- a/Documentation/devicetree/bindings/sound/fsl-sai.txt
+++ b/Documentation/devicetree/bindings/sound/fsl-sai.txt
@@ -7,8 +7,8 @@ codec/DSP interfaces.
Required properties:
- - compatible : Compatible list, contains "fsl,vf610-sai" or
- "fsl,imx6sx-sai".
+ - compatible : Compatible list, contains "fsl,vf610-sai",
+ "fsl,imx6sx-sai" or "fsl,imx6ul-sai"
- reg : Offset and length of the register set for the device.
@@ -48,6 +48,11 @@ Required properties:
receive data by following their own bit clocks and
frame sync clocks separately.
+Optional properties (for mx6ul):
+
+ - fsl,sai-mclk-direction-output: This is a boolean property. If present,
+ indicates that SAI will output the SAI MCLK clock.
+
Note:
- If both fsl,sai-asynchronous and fsl,sai-synchronous-rx are absent, the
default synchronous mode (sync Rx with Tx) will be used, which means both
diff --git a/Documentation/devicetree/bindings/sound/pcm5102a.txt b/Documentation/devicetree/bindings/sound/pcm5102a.txt
new file mode 100644
index 000000000000..c63ab0b6ee19
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/pcm5102a.txt
@@ -0,0 +1,13 @@
+PCM5102a audio CODECs
+
+These devices does not use I2C or SPI.
+
+Required properties:
+
+ - compatible : set as "ti,pcm5102a"
+
+Examples:
+
+ pcm5102a: pcm5102a {
+ compatible = "ti,pcm5102a";
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
index fa77f874e321..1ad0fe310ff9 100644
--- a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
+++ b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
@@ -1,7 +1,10 @@
ARM Freescale DSPI controller
Required properties:
-- compatible : "fsl,vf610-dspi", "fsl,ls1021a-v1.0-dspi", "fsl,ls2085a-dspi"
+- compatible : "fsl,vf610-dspi", "fsl,ls1021a-v1.0-dspi",
+ "fsl,ls2085a-dspi"
+ or
+ "fsl,ls2080a-dspi" followed by "fsl,ls2085a-dspi"
- reg : Offset and length of the register set for the device
- interrupts : Should contain SPI controller interrupt
- clocks: from common clock binding: handle to dspi clock.
diff --git a/Documentation/devicetree/bindings/spi/ti_qspi.txt b/Documentation/devicetree/bindings/spi/ti_qspi.txt
index cc8304aa64ac..50b14f6b53a3 100644
--- a/Documentation/devicetree/bindings/spi/ti_qspi.txt
+++ b/Documentation/devicetree/bindings/spi/ti_qspi.txt
@@ -19,6 +19,13 @@ Optional properties:
- syscon-chipselects: Handle to system control region contains QSPI
chipselect register and offset of that register.
+NOTE: TI QSPI controller requires different pinmux and IODelay
+paramaters for Mode-0 and Mode-3 operations, which needs to be set up by
+the bootloader (U-Boot). Default configuration only supports Mode-0
+operation. Hence, "spi-cpol" and "spi-cpha" DT properties cannot be
+specified in the slave nodes of TI QSPI controller without appropriate
+modification to bootloader.
+
Example:
For am4372:
diff --git a/Documentation/devicetree/bindings/timer/arm,mps2-timer.txt b/Documentation/devicetree/bindings/timer/arm,mps2-timer.txt
new file mode 100644
index 000000000000..48f84d74edde
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/arm,mps2-timer.txt
@@ -0,0 +1,28 @@
+ARM MPS2 timer
+
+The MPS2 platform has simple general-purpose 32 bits timers.
+
+Required properties:
+- compatible : Should be "arm,mps2-timer"
+- reg : Address and length of the register set
+- interrupts : Reference to the timer interrupt
+
+Required clocking property, have to be one of:
+- clocks : The input clock of the timer
+- clock-frequency : The rate in HZ in input of the ARM MPS2 timer
+
+Examples:
+
+timer1: mps2-timer@40000000 {
+ compatible = "arm,mps2-timer";
+ reg = <0x40000000 0x1000>;
+ interrupts = <8>;
+ clocks = <&sysclk>;
+};
+
+timer2: mps2-timer@40001000 {
+ compatible = "arm,mps2-timer";
+ reg = <0x40001000 0x1000>;
+ interrupts = <9>;
+ clock-frequency = <25000000>;
+};
diff --git a/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt
new file mode 100644
index 000000000000..c8c03d700382
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/ezchip,nps400-timer.txt
@@ -0,0 +1,15 @@
+NPS Network Processor
+
+Required properties:
+
+- compatible : should be "ezchip,nps400-timer"
+
+Clocks required for compatible = "ezchip,nps400-timer":
+- clocks : Must contain a single entry describing the clock input
+
+Example:
+
+timer {
+ compatible = "ezchip,nps400-timer";
+ clocks = <&sysclk>;
+};
diff --git a/Documentation/devicetree/bindings/timer/snps,arc-timer.txt b/Documentation/devicetree/bindings/timer/snps,arc-timer.txt
new file mode 100644
index 000000000000..4ef024630d61
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,arc-timer.txt
@@ -0,0 +1,31 @@
+Synopsys ARC Local Timer with Interrupt Capabilities
+- Found on all ARC CPUs (ARC700/ARCHS)
+- Can be optionally programmed to interrupt on Limit
+- Two idential copies TIMER0 and TIMER1 exist in ARC cores and historically
+ TIMER0 used as clockevent provider (true for all ARC cores)
+ TIMER1 used for clocksource (mandatory for ARC700, optional for ARC HS)
+
+Required properties:
+
+- compatible : should be "snps,arc-timer"
+- interrupts : single Interrupt going into parent intc
+ (16 for ARCHS cores, 3 for ARC700 cores)
+- clocks : phandle to the source clock
+
+Optional properties:
+
+- interrupt-parent : phandle to parent intc
+
+Example:
+
+ timer0 {
+ compatible = "snps,arc-timer";
+ interrupts = <3>;
+ interrupt-parent = <&core_intc>;
+ clocks = <&core_clk>;
+ };
+
+ timer1 {
+ compatible = "snps,arc-timer";
+ clocks = <&core_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt b/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt
new file mode 100644
index 000000000000..b6cd1b3922de
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,archs-gfrc.txt
@@ -0,0 +1,14 @@
+Synopsys ARC Free Running 64-bit Global Timer for ARC HS CPUs
+- clocksource provider for SMP SoC
+
+Required properties:
+
+- compatible : should be "snps,archs-gfrc"
+- clocks : phandle to the source clock
+
+Example:
+
+ gfrc {
+ compatible = "snps,archs-gfrc";
+ clocks = <&core_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt b/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt
new file mode 100644
index 000000000000..47bd7a702f3f
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/snps,archs-rtc.txt
@@ -0,0 +1,14 @@
+Synopsys ARC Free Running 64-bit Local Timer for ARC HS CPUs
+- clocksource provider for UP SoC
+
+Required properties:
+
+- compatible : should be "snps,archs-rtc"
+- clocks : phandle to the source clock
+
+Example:
+
+ rtc {
+ compatible = "snps,arc-rtc";
+ clocks = <&core_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.txt b/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.txt
new file mode 100644
index 000000000000..d28295a3e55f
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.txt
@@ -0,0 +1,120 @@
+NVIDIA Tegra xHCI controller
+============================
+
+The Tegra xHCI controller supports both USB2 and USB3 interfaces exposed by
+the Tegra XUSB pad controller.
+
+Required properties:
+--------------------
+- compatible: Must be:
+ - Tegra124: "nvidia,tegra124-xusb"
+ - Tegra132: "nvidia,tegra132-xusb", "nvidia,tegra124-xusb"
+ - Tegra210: "nvidia,tegra210-xusb"
+- reg: Must contain the base and length of the xHCI host registers, XUSB FPCI
+ registers and XUSB IPFS registers.
+- reg-names: Must contain the following entries:
+ - "hcd"
+ - "fpci"
+ - "ipfs"
+- interrupts: Must contain the xHCI host interrupt and the mailbox interrupt.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clock/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - xusb_host
+ - xusb_host_src
+ - xusb_falcon_src
+ - xusb_ss
+ - xusb_ss_src
+ - xusb_ss_div2
+ - xusb_hs_src
+ - xusb_fs_src
+ - pll_u_480m
+ - clk_m
+ - pll_e
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - xusb_host
+ - xusb_ss
+ - xusb_src
+ Note that xusb_src is the shared reset for xusb_{ss,hs,fs,falcon,host}_src.
+- nvidia,xusb-padctl: phandle to the XUSB pad controller that is used to
+ configure the USB pads used by the XHCI controller
+
+For Tegra124 and Tegra132:
+- avddio-pex-supply: PCIe/USB3 analog logic power supply. Must supply 1.05 V.
+- dvddio-pex-supply: PCIe/USB3 digital logic power supply. Must supply 1.05 V.
+- avdd-usb-supply: USB controller power supply. Must supply 3.3 V.
+- avdd-pll-utmip-supply: UTMI PLL power supply. Must supply 1.8 V.
+- avdd-pll-erefe-supply: PLLE reference PLL power supply. Must supply 1.05 V.
+- avdd-usb-ss-pll-supply: PCIe/USB3 PLL power supply. Must supply 1.05 V.
+- hvdd-usb-ss-supply: High-voltage PCIe/USB3 power supply. Must supply 3.3 V.
+- hvdd-usb-ss-pll-e-supply: High-voltage PLLE power supply. Must supply 3.3 V.
+
+For Tegra210:
+- dvddio-pex-supply: PCIe/USB3 analog logic power supply. Must supply 1.05 V.
+- hvddio-pex-supply: High-voltage PCIe/USB3 power supply. Must supply 1.8 V.
+- avdd-usb-supply: USB controller power supply. Must supply 3.3 V.
+- avdd-pll-utmip-supply: UTMI PLL power supply. Must supply 1.8 V.
+- avdd-pll-uerefe-supply: PLLE reference PLL power supply. Must supply 1.05 V.
+- dvdd-pex-pll-supply: PCIe/USB3 PLL power supply. Must supply 1.05 V.
+- hvdd-pex-pll-e-supply: High-voltage PLLE power supply. Must supply 1.8 V.
+
+Optional properties:
+--------------------
+- phys: Must contain an entry for each entry in phy-names.
+ See ../phy/phy-bindings.txt for details.
+- phy-names: Should include an entry for each PHY used by the controller. The
+ following PHYs are available:
+ - Tegra124: usb2-0, usb2-1, usb2-2, hsic-0, hsic-1, usb3-0, usb3-1
+ - Tegra132: usb2-0, usb2-1, usb2-2, hsic-0, hsic-1, usb3-0, usb3-1
+ - Tegra210: usb2-0, usb2-1, usb2-2, usb2-3, hsic-0, usb3-0, usb3-1, usb3-2,
+ usb3-3
+
+Example:
+--------
+
+ usb@0,70090000 {
+ compatible = "nvidia,tegra124-xusb";
+ reg = <0x0 0x70090000 0x0 0x8000>,
+ <0x0 0x70098000 0x0 0x1000>,
+ <0x0 0x70099000 0x0 0x1000>;
+ reg-names = "hcd", "fpci", "ipfs";
+
+ interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&tegra_car TEGRA124_CLK_XUSB_HOST>,
+ <&tegra_car TEGRA124_CLK_XUSB_HOST_SRC>,
+ <&tegra_car TEGRA124_CLK_XUSB_FALCON_SRC>,
+ <&tegra_car TEGRA124_CLK_XUSB_SS>,
+ <&tegra_car TEGRA124_CLK_XUSB_SS_DIV2>,
+ <&tegra_car TEGRA124_CLK_XUSB_SS_SRC>,
+ <&tegra_car TEGRA124_CLK_XUSB_HS_SRC>,
+ <&tegra_car TEGRA124_CLK_XUSB_FS_SRC>,
+ <&tegra_car TEGRA124_CLK_PLL_U_480M>,
+ <&tegra_car TEGRA124_CLK_CLK_M>,
+ <&tegra_car TEGRA124_CLK_PLL_E>;
+ clock-names = "xusb_host", "xusb_host_src", "xusb_falcon_src",
+ "xusb_ss", "xusb_ss_div2", "xusb_ss_src",
+ "xusb_hs_src", "xusb_fs_src", "pll_u_480m",
+ "clk_m", "pll_e";
+ resets = <&tegra_car 89>, <&tegra_car 156>, <&tegra_car 143>;
+ reset-names = "xusb_host", "xusb_ss", "xusb_src";
+
+ nvidia,xusb-padctl = <&padctl>;
+
+ phys = <&{/padctl@0,7009f000/pads/usb2/usb2-1}>, /* mini-PCIe USB */
+ <&{/padctl@0,7009f000/pads/usb2/usb2-2}>, /* USB A */
+ <&{/padctl@0,7009f000/pads/pcie/pcie-0}>; /* USB A */
+ phy-names = "utmi-1", "utmi-2", "usb3-0";
+
+ avddio-pex-supply = <&vdd_1v05_run>;
+ dvddio-pex-supply = <&vdd_1v05_run>;
+ avdd-usb-supply = <&vdd_3v3_lp0>;
+ avdd-pll-utmip-supply = <&vddio_1v8>;
+ avdd-pll-erefe-supply = <&avdd_1v05_run>;
+ avdd-usb-ss-pll-supply = <&vdd_1v05_run>;
+ hvdd-usb-ss-supply = <&vdd_3v3_lp0>;
+ hvdd-usb-ss-pll-e-supply = <&vdd_3v3_lp0>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index 6a17aa85c4d5..966885c636d0 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -4,6 +4,7 @@ Required properties:
- compatible: should be one or more of
- "generic-xhci" for generic XHCI device
+ - "marvell,armada3700-xhci" for Armada 37xx SoCs
- "marvell,armada-375-xhci" for Armada 375 SoCs
- "marvell,armada-380-xhci" for Armada 38x SoCs
- "renesas,xhci-r8a7790" for r8a7790 SoC
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 86740d4a270d..316412dc7913 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -16,6 +16,7 @@ al Annapurna Labs
allwinner Allwinner Technology Co., Ltd.
alphascale AlphaScale Integrated Circuits Systems, Inc.
altr Altera Corp.
+amazon Amazon.com, Inc.
amcc Applied Micro Circuits Corporation (APM, formally AMCC)
amd Advanced Micro Devices (AMD), Inc.
amlogic Amlogic, Inc.
@@ -29,6 +30,7 @@ arm ARM Ltd.
armadeus ARMadeus Systems SARL
artesyn Artesyn Embedded Technologies Inc.
asahi-kasei Asahi Kasei Corp.
+aspeed ASPEED Technology Inc.
atlas Atlas Scientific LLC
atmel Atmel Corporation
auo AU Optronics Corporation
@@ -70,6 +72,8 @@ digilent Diglent, Inc.
dlg Dialog Semiconductor
dlink D-Link Corporation
dmo Data Modul AG
+dptechnics DPTechnics
+dragino Dragino Technology Co., Limited
ea Embedded Artists AB
ebv EBV Elektronik
edt Emerging Display Technologies
@@ -86,11 +90,13 @@ eukrea Eukréa Electromatique
everest Everest Semiconductor Co. Ltd.
everspin Everspin Technologies, Inc.
excito Excito
+ezchip EZchip Semiconductor
fcs Fairchild Semiconductor
firefly Firefly
focaltech FocalTech Systems Co.,Ltd
fsl Freescale Semiconductor
ge General Electric Company
+geekbuying GeekBuying
GEFanuc GE Fanuc Intelligent Platforms Embedded Systems, Inc.
gef GE Fanuc Intelligent Platforms Embedded Systems, Inc.
geniatech Geniatech, Inc.
@@ -152,6 +158,7 @@ mitsubishi Mitsubishi Electric Corporation
mosaixtech Mosaix Technologies, Inc.
moxa Moxa
mpl MPL AG
+mqmaker mqmaker Inc.
msi Micro-Star International Co. Ltd.
mti Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
mundoreader Mundo Reader S.L.
@@ -171,12 +178,14 @@ nvidia NVIDIA
nxp NXP Semiconductors
okaya Okaya Electric America, Inc.
olimex OLIMEX Ltd.
+onion Onion Corporation
onnn ON Semiconductor Corp.
opencores OpenCores.org
option Option NV
ortustech Ortus Technology Co., Ltd.
ovti OmniVision Technologies
ORCL Oracle Corporation
+oxsemi Oxford Semiconductor, Ltd.
panasonic Panasonic Corporation
parade Parade Technologies Inc.
pericom Pericom Technology Inc.
@@ -250,6 +259,7 @@ tplink TP-LINK Technologies Co., Ltd.
tronfy Tronfy
tronsmart Tronsmart
truly Truly Semiconductors Limited
+tyan Tyan Computer Corporation
upisemi uPI Semiconductor Corp.
urt United Radiant Technology Corporation
usi Universal Scientific Industrial Co., Ltd.
@@ -259,6 +269,7 @@ via VIA Technologies, Inc.
virtio Virtual I/O Device Specification, developed by the OASIS consortium
vivante Vivante Corporation
voipac Voipac Technologies s.r.o.
+wd Western Digital Corp.
wexler Wexler
winbond Winbond Electronics corp.
wlf Wolfson Microelectronics
diff --git a/Documentation/devicetree/bindings/watchdog/microchip,pic32-dmt.txt b/Documentation/devicetree/bindings/watchdog/microchip,pic32-dmt.txt
new file mode 100644
index 000000000000..852f694f3177
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/microchip,pic32-dmt.txt
@@ -0,0 +1,19 @@
+* Microchip PIC32 Deadman Timer
+
+The deadman timer is used to reset the processor in the event of a software
+malfunction. It is a free-running instruction fetch timer, which is clocked
+whenever an instruction fetch occurs until a count match occurs.
+
+Required properties:
+- compatible: must be "microchip,pic32mzda-dmt".
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- clocks: phandle of parent clock (should be &PBCLK7).
+
+Example:
+
+ watchdog@1f800a00 {
+ compatible = "microchip,pic32mzda-dmt";
+ reg = <0x1f800a00 0x80>;
+ clocks = <&PBCLK7>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/microchip,pic32-wdt.txt b/Documentation/devicetree/bindings/watchdog/microchip,pic32-wdt.txt
new file mode 100644
index 000000000000..d1401030e75c
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/microchip,pic32-wdt.txt
@@ -0,0 +1,18 @@
+* Microchip PIC32 Watchdog Timer
+
+When enabled, the watchdog peripheral can be used to reset the device if the
+WDT is not cleared periodically in software.
+
+Required properties:
+- compatible: must be "microchip,pic32mzda-wdt".
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- clocks: phandle of source clk. should be <&LPRC> clk.
+
+Example:
+
+ watchdog@1f800800 {
+ compatible = "microchip,pic32mzda-wdt";
+ reg = <0x1f800800 0x200>;
+ clocks = <&LPRC>;
+ };
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 25e8f3ea3b80..7281fb4b4316 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -331,6 +331,8 @@ PHY
PINCTRL
devm_pinctrl_get()
devm_pinctrl_put()
+ devm_pinctrl_register()
+ devm_pinctrl_unregister()
PWM
devm_pwm_get()
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 619af9bfdcb3..75eea7ce3d7c 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -194,7 +194,7 @@ prototypes:
void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
- int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
+ int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
int (*migratepage)(struct address_space *, struct page *, struct page *);
int (*launder_page)(struct page *);
int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
diff --git a/Documentation/filesystems/cramfs.txt b/Documentation/filesystems/cramfs.txt
index 31f53f0ab957..4006298f6707 100644
--- a/Documentation/filesystems/cramfs.txt
+++ b/Documentation/filesystems/cramfs.txt
@@ -38,7 +38,7 @@ the update lasts only as long as the inode is cached in memory, after
which the timestamp reverts to 1970, i.e. moves backwards in time.
Currently, cramfs must be written and read with architectures of the
-same endianness, and can be read only by kernels with PAGE_CACHE_SIZE
+same endianness, and can be read only by kernels with PAGE_SIZE
== 4096. At least the latter of these is a bug, but it hasn't been
decided what the best fix is. For the moment if you have larger pages
you can just change the #define in mkcramfs.c, so long as you don't
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index f1b87d8aa2da..46f3bb7a02f5 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -525,3 +525,56 @@ in your dentry operations instead.
set_delayed_call() where it used to set *cookie.
->put_link() is gone - just give the destructor to set_delayed_call()
in ->get_link().
+--
+[mandatory]
+ ->getxattr() and xattr_handler.get() get dentry and inode passed separately.
+ dentry might be yet to be attached to inode, so do _not_ use its ->d_inode
+ in the instances. Rationale: !@#!@# security_d_instantiate() needs to be
+ called before we attach dentry to inode.
+--
+[mandatory]
+ symlinks are no longer the only inodes that do *not* have i_bdev/i_cdev/
+ i_pipe/i_link union zeroed out at inode eviction. As the result, you can't
+ assume that non-NULL value in ->i_nlink at ->destroy_inode() implies that
+ it's a symlink. Checking ->i_mode is really needed now. In-tree we had
+ to fix shmem_destroy_callback() that used to take that kind of shortcut;
+ watch out, since that shortcut is no longer valid.
+--
+[mandatory]
+ ->i_mutex is replaced with ->i_rwsem now. inode_lock() et.al. work as
+ they used to - they just take it exclusive. However, ->lookup() may be
+ called with parent locked shared. Its instances must not
+ * use d_instantiate) and d_rehash() separately - use d_add() or
+ d_splice_alias() instead.
+ * use d_rehash() alone - call d_add(new_dentry, NULL) instead.
+ * in the unlikely case when (read-only) access to filesystem
+ data structures needs exclusion for some reason, arrange it
+ yourself. None of the in-tree filesystems needed that.
+ * rely on ->d_parent and ->d_name not changing after dentry has
+ been fed to d_add() or d_splice_alias(). Again, none of the
+ in-tree instances relied upon that.
+ We are guaranteed that lookups of the same name in the same directory
+ will not happen in parallel ("same" in the sense of your ->d_compare()).
+ Lookups on different names in the same directory can and do happen in
+ parallel now.
+--
+[recommended]
+ ->iterate_shared() is added; it's a parallel variant of ->iterate().
+ Exclusion on struct file level is still provided (as well as that
+ between it and lseek on the same struct file), but if your directory
+ has been opened several times, you can get these called in parallel.
+ Exclusion between that method and all directory-modifying ones is
+ still provided, of course.
+
+ Often enough ->iterate() can serve as ->iterate_shared() without any
+ changes - it is a read-only operation, after all. If you have any
+ per-inode or per-dentry in-core data structures modified by ->iterate(),
+ you might need something to serialize the access to them. If you
+ do dcache pre-seeding, you'll need to switch to d_alloc_parallel() for
+ that; look for in-tree examples.
+
+ Old method is only used if the new one is absent; eventually it will
+ be removed. Switch while you still can; the old one won't stay.
+--
+[mandatory]
+ ->atomic_open() calls without O_CREAT may happen in parallel.
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index d392e1505f17..d9c11d25bf02 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -60,7 +60,7 @@ size: The limit of allocated bytes for this tmpfs instance. The
default is half of your physical RAM without swap. If you
oversize your tmpfs instances the machine will deadlock
since the OOM handler will not be able to free that memory.
-nr_blocks: The same as size, but in blocks of PAGE_CACHE_SIZE.
+nr_blocks: The same as size, but in blocks of PAGE_SIZE.
nr_inodes: The maximum number of inodes for this instance. The default
is half of the number of your physical RAM pages, or (on a
machine with highmem) the number of lowmem RAM pages,
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index b02a7d598258..c61a223ef3ff 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -591,7 +591,7 @@ struct address_space_operations {
void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
- ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
+ ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
int (*launder_page) (struct page *);
@@ -708,9 +708,9 @@ struct address_space_operations {
from the address space. This generally corresponds to either a
truncation, punch hole or a complete invalidation of the address
space (in the latter case 'offset' will always be 0 and 'length'
- will be PAGE_CACHE_SIZE). Any private data associated with the page
+ will be PAGE_SIZE). Any private data associated with the page
should be updated to reflect this truncation. If offset is 0 and
- length is PAGE_CACHE_SIZE, then the private data should be released,
+ length is PAGE_SIZE, then the private data should be released,
because the page must be able to be completely discarded. This may
be done by calling the ->releasepage function, but in this case the
release MUST succeed.
diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index bbeec415f406..6cb35a78eff4 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -68,6 +68,103 @@ control callbacks) if it is expected to call GPIO APIs from atomic context
on -RT (inside hard IRQ handlers and similar contexts). Normally this should
not be required.
+
+GPIOs with open drain/source support
+------------------------------------
+
+Open drain (CMOS) or open collector (TTL) means the line is not actively driven
+high: instead you provide the drain/collector as output, so when the transistor
+is not open, it will present a high-impedance (tristate) to the external rail.
+
+
+ CMOS CONFIGURATION TTL CONFIGURATION
+
+ ||--- out +--- out
+ in ----|| |/
+ ||--+ in ----|
+ | |\
+ GND GND
+
+This configuration is normally used as a way to achieve one of two things:
+
+- Level-shifting: to reach a logical level higher than that of the silicon
+ where the output resides.
+
+- inverse wire-OR on an I/O line, for example a GPIO line, making it possible
+ for any driving stage on the line to drive it low even if any other output
+ to the same line is simultaneously driving it high. A special case of this
+ is driving the SCL and SCA lines of an I2C bus, which is by definition a
+ wire-OR bus.
+
+Both usecases require that the line be equipped with a pull-up resistor. This
+resistor will make the line tend to high level unless one of the transistors on
+the rail actively pulls it down.
+
+The level on the line will go as high as the VDD on the pull-up resistor, which
+may be higher than the level supported by the transistor, achieveing a
+level-shift to the higher VDD.
+
+Integrated electronics often have an output driver stage in the form of a CMOS
+"totem-pole" with one N-MOS and one P-MOS transistor where one of them drives
+the line high and one of them drives the line low. This is called a push-pull
+output. The "totem-pole" looks like so:
+
+ VDD
+ |
+ OD ||--+
+ +--/ ---o|| P-MOS-FET
+ | ||--+
+IN --+ +----- out
+ | ||--+
+ +--/ ----|| N-MOS-FET
+ OS ||--+
+ |
+ GND
+
+The desired output signal (e.g. coming directly from some GPIO output register)
+arrives at IN. The switches named "OD" and "OS" are normally closed, creating
+a push-pull circuit.
+
+Consider the little "switches" named "OD" and "OS" that enable/disable the
+P-MOS or N-MOS transistor right after the split of the input. As you can see,
+either transistor will go totally numb if this switch is open. The totem-pole
+is then halved and give high impedance instead of actively driving the line
+high or low respectively. That is usually how software-controlled open
+drain/source works.
+
+Some GPIO hardware come in open drain / open source configuration. Some are
+hard-wired lines that will only support open drain or open source no matter
+what: there is only one transistor there. Some are software-configurable:
+by flipping a bit in a register the output can be configured as open drain
+or open source, in practice by flicking open the switches labeled "OD" and "OS"
+in the drawing above.
+
+By disabling the P-MOS transistor, the output can be driven between GND and
+high impedance (open drain), and by disabling the N-MOS transistor, the output
+can be driven between VDD and high impedance (open source). In the first case,
+a pull-up resistor is needed on the outgoing rail to complete the circuit, and
+in the second case, a pull-down resistor is needed on the rail.
+
+Hardware that supports open drain or open source or both, can implement a
+special callback in the gpio_chip: .set_single_ended() that takes an enum flag
+telling whether to configure the line as open drain, open source or push-pull.
+This will happen in response to the GPIO_OPEN_DRAIN or GPIO_OPEN_SOURCE flag
+set in the machine file, or coming from other hardware descriptions.
+
+If this state can not be configured in hardware, i.e. if the GPIO hardware does
+not support open drain/open source in hardware, the GPIO library will instead
+use a trick: when a line is set as output, if the line is flagged as open
+drain, and the IN output value is low, it will be driven low as usual. But
+if the IN output value is set to high, it will instead *NOT* be driven high,
+instead it will be switched to input, as input mode is high impedance, thus
+achieveing an "open drain emulation" of sorts: electrically the behaviour will
+be identical, with the exception of possible hardware glitches when switching
+the mode of the line.
+
+For open source configuration the same principle is used, just that instead
+of actively driving the line low, it is set to input.
+
+
GPIO drivers providing IRQs
---------------------------
It is custom that GPIO drivers (GPIO chips) are also providing interrupts,
diff --git a/Documentation/hwmon/fam15h_power b/Documentation/hwmon/fam15h_power
index e2b1b69eebea..fb594c281c46 100644
--- a/Documentation/hwmon/fam15h_power
+++ b/Documentation/hwmon/fam15h_power
@@ -10,14 +10,22 @@ Supported chips:
Datasheets:
BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors
BIOS and Kernel Developer's Guide (BKDG) For AMD Family 16h Processors
+ AMD64 Architecture Programmer's Manual Volume 2: System Programming
Author: Andreas Herrmann <herrmann.der.user@googlemail.com>
Description
-----------
+1) Processor TDP (Thermal design power)
+
+Given a fixed frequency and voltage, the power consumption of a
+processor varies based on the workload being executed. Derated power
+is the power consumed when running a specific application. Thermal
+design power (TDP) is an example of derated power.
+
This driver permits reading of registers providing power information
-of AMD Family 15h and 16h processors.
+of AMD Family 15h and 16h processors via TDP algorithm.
For AMD Family 15h and 16h processors the following power values can
be calculated using different processor northbridge function
@@ -37,3 +45,58 @@ This driver provides ProcessorPwrWatts and CurrPwrWatts:
On multi-node processors the calculated value is for the entire
package and not for a single node. Thus the driver creates sysfs
attributes only for internal node0 of a multi-node processor.
+
+2) Accumulated Power Mechanism
+
+This driver also introduces an algorithm that should be used to
+calculate the average power consumed by a processor during a
+measurement interval Tm. The feature of accumulated power mechanism is
+indicated by CPUID Fn8000_0007_EDX[12].
+
+* Tsample: compute unit power accumulator sample period
+* Tref: the PTSC counter period
+* PTSC: performance timestamp counter
+* N: the ratio of compute unit power accumulator sample period to the
+ PTSC period
+* Jmax: max compute unit accumulated power which is indicated by
+ MaxCpuSwPwrAcc MSR C001007b
+* Jx/Jy: compute unit accumulated power which is indicated by
+ CpuSwPwrAcc MSR C001007a
+* Tx/Ty: the value of performance timestamp counter which is indicated
+ by CU_PTSC MSR C0010280
+* PwrCPUave: CPU average power
+
+i. Determine the ratio of Tsample to Tref by executing CPUID Fn8000_0007.
+ N = value of CPUID Fn8000_0007_ECX[CpuPwrSampleTimeRatio[15:0]].
+
+ii. Read the full range of the cumulative energy value from the new
+MSR MaxCpuSwPwrAcc.
+ Jmax = value returned.
+iii. At time x, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+ Jx = value read from CpuSwPwrAcc and Tx = value read from
+PTSC.
+
+iv. At time y, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+ Jy = value read from CpuSwPwrAcc and Ty = value read from
+PTSC.
+
+v. Calculate the average power consumption for a compute unit over
+time period (y-x). Unit of result is uWatt.
+ if (Jy < Jx) // Rollover has occurred
+ Jdelta = (Jy + Jmax) - Jx
+ else
+ Jdelta = Jy - Jx
+ PwrCPUave = N * Jdelta * 1000 / (Ty - Tx)
+
+This driver provides PwrCPUave and interval(default is 10 millisecond
+and maximum is 1 second):
+* power1_average (PwrCPUave)
+* power1_average_interval (Interval)
+
+The power1_average_interval can be updated at /etc/sensors3.conf file
+as below:
+
+chip "fam15h_power-*"
+ set power1_average_interval 0.01
+
+Then save it with "sensors -s".
diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87
index 733296d65449..fff6f6bf55bc 100644
--- a/Documentation/hwmon/it87
+++ b/Documentation/hwmon/it87
@@ -9,6 +9,9 @@ Supported chips:
* IT8620E
Prefix: 'it8620'
Addresses scanned: from Super I/O config space (8 I/O ports)
+ * IT8628E
+ Prefix: 'it8628'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
Datasheet: Not publicly available
* IT8705F
Prefix: 'it87'
@@ -114,8 +117,8 @@ motherboard models.
Description
-----------
-This driver implements support for the IT8603E, IT8620E, IT8623E, IT8705F,
-IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8732F,
+This driver implements support for the IT8603E, IT8620E, IT8623E, IT8628E,
+IT8705F, IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8732F,
IT8758E, IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E, and
SiS950 chips.
@@ -158,8 +161,8 @@ The IT8603E/IT8623E is a custom design, hardware monitoring part is similar to
IT8728F. It only supports 3 fans, 16-bit fan mode, and the full speed mode
of the fan is not supported (value 0 of pwmX_enable).
-The IT8620E is another custom design, hardware monitoring part is similar to
-IT8728F. It only supports 16-bit fan mode.
+The IT8620E and IT8628E are custom designs, hardware monitoring part is similar
+to IT8728F. It only supports 16-bit fan mode. Both chips support up to 6 fans.
The IT8790E supports up to 3 fans. 16-bit fan mode is always enabled.
@@ -187,8 +190,8 @@ of 0.016 volt. IT8603E, IT8721F/IT8758E and IT8728F can measure between 0 and
2.8 volts with a resolution of 0.0109 volt. The battery voltage in8 does not
have limit registers.
-On the IT8603E, IT8721F/IT8758E, IT8732F, IT8781F, IT8782F, and IT8783E/F, some
-voltage inputs are internal and scaled inside the chip:
+On the IT8603E, IT8620E, IT8628E, IT8721F/IT8758E, IT8732F, IT8781F, IT8782F,
+and IT8783E/F, some voltage inputs are internal and scaled inside the chip:
* in3 (optional)
* in7 (optional for IT8781F, IT8782F, and IT8783E/F)
* in8 (always)
diff --git a/Documentation/hwmon/max31722 b/Documentation/hwmon/max31722
new file mode 100644
index 000000000000..090da84538c8
--- /dev/null
+++ b/Documentation/hwmon/max31722
@@ -0,0 +1,34 @@
+Kernel driver max31722
+======================
+
+Supported chips:
+ * Maxim Integrated MAX31722
+ Prefix: 'max31722'
+ ACPI ID: MAX31722
+ Addresses scanned: -
+ Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX31722-MAX31723.pdf
+ * Maxim Integrated MAX31723
+ Prefix: 'max31723'
+ ACPI ID: MAX31723
+ Addresses scanned: -
+ Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX31722-MAX31723.pdf
+
+Author: Tiberiu Breana <tiberiu.a.breana@intel.com>
+
+Description
+-----------
+
+This driver adds support for the Maxim Integrated MAX31722/MAX31723 thermometers
+and thermostats running over an SPI interface.
+
+Usage Notes
+-----------
+
+This driver uses ACPI to auto-detect devices. See ACPI IDs in the above section.
+
+Sysfs entries
+-------------
+
+The following attribute is supported:
+
+temp1_input Measured temperature. Read-only.
diff --git a/Documentation/i2c/i2c-topology b/Documentation/i2c/i2c-topology
new file mode 100644
index 000000000000..e0aefeece551
--- /dev/null
+++ b/Documentation/i2c/i2c-topology
@@ -0,0 +1,370 @@
+I2C topology
+============
+
+There are a couple of reasons for building more complex i2c topologies
+than a straight-forward i2c bus with one adapter and one or more devices.
+
+1. A mux may be needed on the bus to prevent address collisions.
+
+2. The bus may be accessible from some external bus master, and arbitration
+ may be needed to determine if it is ok to access the bus.
+
+3. A device (particularly RF tuners) may want to avoid the digital noise
+ from the i2c bus, at least most of the time, and sits behind a gate
+ that has to be operated before the device can be accessed.
+
+Etc
+
+These constructs are represented as i2c adapter trees by Linux, where
+each adapter has a parent adapter (except the root adapter) and zero or
+more child adapters. The root adapter is the actual adapter that issues
+i2c transfers, and all adapters with a parent are part of an "i2c-mux"
+object (quoted, since it can also be an arbitrator or a gate).
+
+Depending of the particular mux driver, something happens when there is
+an i2c transfer on one of its child adapters. The mux driver can
+obviously operate a mux, but it can also do arbitration with an external
+bus master or open a gate. The mux driver has two operations for this,
+select and deselect. select is called before the transfer and (the
+optional) deselect is called after the transfer.
+
+
+Locking
+=======
+
+There are two variants of locking available to i2c muxes, they can be
+mux-locked or parent-locked muxes. As is evident from below, it can be
+useful to know if a mux is mux-locked or if it is parent-locked. The
+following list was correct at the time of writing:
+
+In drivers/i2c/muxes/
+i2c-arb-gpio-challenge Parent-locked
+i2c-mux-gpio Normally parent-locked, mux-locked iff
+ all involved gpio pins are controlled by the
+ same i2c root adapter that they mux.
+i2c-mux-pca9541 Parent-locked
+i2c-mux-pca954x Parent-locked
+i2c-mux-pinctrl Normally parent-locked, mux-locked iff
+ all involved pinctrl devices are controlled
+ by the same i2c root adapter that they mux.
+i2c-mux-reg Parent-locked
+
+In drivers/iio/
+imu/inv_mpu6050/ Mux-locked
+
+In drivers/media/
+dvb-frontends/m88ds3103 Parent-locked
+dvb-frontends/rtl2830 Parent-locked
+dvb-frontends/rtl2832 Mux-locked
+dvb-frontends/si2168 Mux-locked
+usb/cx231xx/ Parent-locked
+
+
+Mux-locked muxes
+----------------
+
+Mux-locked muxes does not lock the entire parent adapter during the
+full select-transfer-deselect transaction, only the muxes on the parent
+adapter are locked. Mux-locked muxes are mostly interesting if the
+select and/or deselect operations must use i2c transfers to complete
+their tasks. Since the parent adapter is not fully locked during the
+full transaction, unrelated i2c transfers may interleave the different
+stages of the transaction. This has the benefit that the mux driver
+may be easier and cleaner to implement, but it has some caveats.
+
+ML1. If you build a topology with a mux-locked mux being the parent
+ of a parent-locked mux, this might break the expectation from the
+ parent-locked mux that the root adapter is locked during the
+ transaction.
+
+ML2. It is not safe to build arbitrary topologies with two (or more)
+ mux-locked muxes that are not siblings, when there are address
+ collisions between the devices on the child adapters of these
+ non-sibling muxes.
+
+ I.e. the select-transfer-deselect transaction targeting e.g. device
+ address 0x42 behind mux-one may be interleaved with a similar
+ operation targeting device address 0x42 behind mux-two. The
+ intension with such a topology would in this hypothetical example
+ be that mux-one and mux-two should not be selected simultaneously,
+ but mux-locked muxes do not guarantee that in all topologies.
+
+ML3. A mux-locked mux cannot be used by a driver for auto-closing
+ gates/muxes, i.e. something that closes automatically after a given
+ number (one, in most cases) of i2c transfers. Unrelated i2c transfers
+ may creep in and close prematurely.
+
+ML4. If any non-i2c operation in the mux driver changes the i2c mux state,
+ the driver has to lock the root adapter during that operation.
+ Otherwise garbage may appear on the bus as seen from devices
+ behind the mux, when an unrelated i2c transfer is in flight during
+ the non-i2c mux-changing operation.
+
+
+Mux-locked Example
+------------------
+
+ .----------. .--------.
+ .--------. | mux- |-----| dev D1 |
+ | root |--+--| locked | '--------'
+ '--------' | | mux M1 |--. .--------.
+ | '----------' '--| dev D2 |
+ | .--------. '--------'
+ '--| dev D3 |
+ '--------'
+
+When there is an access to D1, this happens:
+
+ 1. Someone issues an i2c-transfer to D1.
+ 2. M1 locks muxes on its parent (the root adapter in this case).
+ 3. M1 calls ->select to ready the mux.
+ 4. M1 (presumably) does some i2c-transfers as part of its select.
+ These transfers are normal i2c-transfers that locks the parent
+ adapter.
+ 5. M1 feeds the i2c-transfer from step 1 to its parent adapter as a
+ normal i2c-transfer that locks the parent adapter.
+ 6. M1 calls ->deselect, if it has one.
+ 7. Same rules as in step 4, but for ->deselect.
+ 8. M1 unlocks muxes on its parent.
+
+This means that accesses to D2 are lockout out for the full duration
+of the entire operation. But accesses to D3 are possibly interleaved
+at any point.
+
+
+Parent-locked muxes
+-------------------
+
+Parent-locked muxes lock the parent adapter during the full select-
+transfer-deselect transaction. The implication is that the mux driver
+has to ensure that any and all i2c transfers through that parent
+adapter during the transaction are unlocked i2c transfers (using e.g.
+__i2c_transfer), or a deadlock will follow. There are a couple of
+caveats.
+
+PL1. If you build a topology with a parent-locked mux being the child
+ of another mux, this might break a possible assumption from the
+ child mux that the root adapter is unused between its select op
+ and the actual transfer (e.g. if the child mux is auto-closing
+ and the parent mux issus i2c-transfers as part of its select).
+ This is especially the case if the parent mux is mux-locked, but
+ it may also happen if the parent mux is parent-locked.
+
+PL2. If select/deselect calls out to other subsystems such as gpio,
+ pinctrl, regmap or iio, it is essential that any i2c transfers
+ caused by these subsystems are unlocked. This can be convoluted to
+ accomplish, maybe even impossible if an acceptably clean solution
+ is sought.
+
+
+Parent-locked Example
+---------------------
+
+ .----------. .--------.
+ .--------. | parent- |-----| dev D1 |
+ | root |--+--| locked | '--------'
+ '--------' | | mux M1 |--. .--------.
+ | '----------' '--| dev D2 |
+ | .--------. '--------'
+ '--| dev D3 |
+ '--------'
+
+When there is an access to D1, this happens:
+
+ 1. Someone issues an i2c-transfer to D1.
+ 2. M1 locks muxes on its parent (the root adapter in this case).
+ 3. M1 locks its parent adapter.
+ 4. M1 calls ->select to ready the mux.
+ 5. If M1 does any i2c-transfers (on this root adapter) as part of
+ its select, those transfers must be unlocked i2c-transfers so
+ that they do not deadlock the root adapter.
+ 6. M1 feeds the i2c-transfer from step 1 to the root adapter as an
+ unlocked i2c-transfer, so that it does not deadlock the parent
+ adapter.
+ 7. M1 calls ->deselect, if it has one.
+ 8. Same rules as in step 5, but for ->deselect.
+ 9. M1 unlocks its parent adapter.
+10. M1 unlocks muxes on its parent.
+
+
+This means that accesses to both D2 and D3 are locked out for the full
+duration of the entire operation.
+
+
+Complex Examples
+================
+
+Parent-locked mux as parent of parent-locked mux
+------------------------------------------------
+
+This is a useful topology, but it can be bad.
+
+ .----------. .----------. .--------.
+ .--------. | parent- |-----| parent- |-----| dev D1 |
+ | root |--+--| locked | | locked | '--------'
+ '--------' | | mux M1 |--. | mux M2 |--. .--------.
+ | '----------' | '----------' '--| dev D2 |
+ | .--------. | .--------. '--------'
+ '--| dev D4 | '--| dev D3 |
+ '--------' '--------'
+
+When any device is accessed, all other devices are locked out for
+the full duration of the operation (both muxes lock their parent,
+and specifically when M2 requests its parent to lock, M1 passes
+the buck to the root adapter).
+
+This topology is bad if M2 is an auto-closing mux and M1->select
+issues any unlocked i2c transfers on the root adapter that may leak
+through and be seen by the M2 adapter, thus closing M2 prematurely.
+
+
+Mux-locked mux as parent of mux-locked mux
+------------------------------------------
+
+This is a good topology.
+
+ .----------. .----------. .--------.
+ .--------. | mux- |-----| mux- |-----| dev D1 |
+ | root |--+--| locked | | locked | '--------'
+ '--------' | | mux M1 |--. | mux M2 |--. .--------.
+ | '----------' | '----------' '--| dev D2 |
+ | .--------. | .--------. '--------'
+ '--| dev D4 | '--| dev D3 |
+ '--------' '--------'
+
+When device D1 is accessed, accesses to D2 are locked out for the
+full duration of the operation (muxes on the top child adapter of M1
+are locked). But accesses to D3 and D4 are possibly interleaved at
+any point. Accesses to D3 locks out D1 and D2, but accesses to D4
+are still possibly interleaved.
+
+
+Mux-locked mux as parent of parent-locked mux
+---------------------------------------------
+
+This is probably a bad topology.
+
+ .----------. .----------. .--------.
+ .--------. | mux- |-----| parent- |-----| dev D1 |
+ | root |--+--| locked | | locked | '--------'
+ '--------' | | mux M1 |--. | mux M2 |--. .--------.
+ | '----------' | '----------' '--| dev D2 |
+ | .--------. | .--------. '--------'
+ '--| dev D4 | '--| dev D3 |
+ '--------' '--------'
+
+When device D1 is accessed, accesses to D2 and D3 are locked out
+for the full duration of the operation (M1 locks child muxes on the
+root adapter). But accesses to D4 are possibly interleaved at any
+point.
+
+This kind of topology is generally not suitable and should probably
+be avoided. The reason is that M2 probably assumes that there will
+be no i2c transfers during its calls to ->select and ->deselect, and
+if there are, any such transfers might appear on the slave side of M2
+as partial i2c transfers, i.e. garbage or worse. This might cause
+device lockups and/or other problems.
+
+The topology is especially troublesome if M2 is an auto-closing
+mux. In that case, any interleaved accesses to D4 might close M2
+prematurely, as might any i2c-transfers part of M1->select.
+
+But if M2 is not making the above stated assumption, and if M2 is not
+auto-closing, the topology is fine.
+
+
+Parent-locked mux as parent of mux-locked mux
+---------------------------------------------
+
+This is a good topology.
+
+ .----------. .----------. .--------.
+ .--------. | parent- |-----| mux- |-----| dev D1 |
+ | root |--+--| locked | | locked | '--------'
+ '--------' | | mux M1 |--. | mux M2 |--. .--------.
+ | '----------' | '----------' '--| dev D2 |
+ | .--------. | .--------. '--------'
+ '--| dev D4 | '--| dev D3 |
+ '--------' '--------'
+
+When D1 is accessed, accesses to D2 are locked out for the full
+duration of the operation (muxes on the top child adapter of M1
+are locked). Accesses to D3 and D4 are possibly interleaved at
+any point, just as is expected for mux-locked muxes.
+
+When D3 or D4 are accessed, everything else is locked out. For D3
+accesses, M1 locks the root adapter. For D4 accesses, the root
+adapter is locked directly.
+
+
+Two mux-locked sibling muxes
+----------------------------
+
+This is a good topology.
+
+ .--------.
+ .----------. .--| dev D1 |
+ | mux- |--' '--------'
+ .--| locked | .--------.
+ | | mux M1 |-----| dev D2 |
+ | '----------' '--------'
+ | .----------. .--------.
+ .--------. | | mux- |-----| dev D3 |
+ | root |--+--| locked | '--------'
+ '--------' | | mux M2 |--. .--------.
+ | '----------' '--| dev D4 |
+ | .--------. '--------'
+ '--| dev D5 |
+ '--------'
+
+When D1 is accessed, accesses to D2, D3 and D4 are locked out. But
+accesses to D5 may be interleaved at any time.
+
+
+Two parent-locked sibling muxes
+-------------------------------
+
+This is a good topology.
+
+ .--------.
+ .----------. .--| dev D1 |
+ | parent- |--' '--------'
+ .--| locked | .--------.
+ | | mux M1 |-----| dev D2 |
+ | '----------' '--------'
+ | .----------. .--------.
+ .--------. | | parent- |-----| dev D3 |
+ | root |--+--| locked | '--------'
+ '--------' | | mux M2 |--. .--------.
+ | '----------' '--| dev D4 |
+ | .--------. '--------'
+ '--| dev D5 |
+ '--------'
+
+When any device is accessed, accesses to all other devices are locked
+out.
+
+
+Mux-locked and parent-locked sibling muxes
+------------------------------------------
+
+This is a good topology.
+
+ .--------.
+ .----------. .--| dev D1 |
+ | mux- |--' '--------'
+ .--| locked | .--------.
+ | | mux M1 |-----| dev D2 |
+ | '----------' '--------'
+ | .----------. .--------.
+ .--------. | | parent- |-----| dev D3 |
+ | root |--+--| locked | '--------'
+ '--------' | | mux M2 |--. .--------.
+ | '----------' '--| dev D4 |
+ | .--------. '--------'
+ '--| dev D5 |
+ '--------'
+
+When D1 or D2 are accessed, accesses to D3 and D4 are locked out while
+accesses to D5 may interleave. When D3 or D4 are accessed, accesses to
+all other devices are locked out.
diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt
index 3f0f5ce3338b..36ea940e5bb9 100644
--- a/Documentation/input/event-codes.txt
+++ b/Documentation/input/event-codes.txt
@@ -173,6 +173,10 @@ A few EV_ABS codes have special meanings:
proximity of the device and while the value of the BTN_TOUCH code is 0. If
the input device may be used freely in three dimensions, consider ABS_Z
instead.
+ - BTN_TOOL_<name> should be set to 1 when the tool comes into detectable
+ proximity and set to 0 when the tool leaves detectable proximity.
+ BTN_TOOL_<name> signals the type of tool that is currently detected by the
+ hardware and is otherwise independent of ABS_DISTANCE and/or BTN_TOUCH.
* ABS_MT_<name>:
- Used to describe multitouch input events. Please see
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ecc74fa4bfde..f5c35901144c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -131,6 +131,7 @@ parameter is applicable:
More X86-64 boot options can be found in
Documentation/x86/x86_64/boot-options.txt .
X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
+ X86_UV SGI UV support is enabled.
XEN Xen support is enabled
In addition, the following text indicates that the option:
@@ -167,16 +168,18 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
acpi= [HW,ACPI,X86,ARM64]
Advanced Configuration and Power Interface
- Format: { force | off | strict | noirq | rsdt |
+ Format: { force | on | off | strict | noirq | rsdt |
copy_dsdt }
force -- enable ACPI if default was off
+ on -- enable ACPI but allow fallback to DT [arm64]
off -- disable ACPI if default was on
noirq -- do not use ACPI for IRQ routing
strict -- Be less tolerant of platforms that are not
strictly ACPI specification compliant.
rsdt -- prefer RSDT over (default) XSDT
copy_dsdt -- copy DSDT to memory
- For ARM64, ONLY "acpi=off" or "acpi=force" are available
+ For ARM64, ONLY "acpi=off", "acpi=on" or "acpi=force"
+ are available
See also Documentation/power/runtime_pm.txt, pci=noacpi
@@ -312,6 +315,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
acpi_osi=!* # remove all strings
acpi_osi=! # disable all built-in OS vendor
strings
+ acpi_osi=!! # enable all built-in OS vendor
+ strings
acpi_osi= # disable all strings
'acpi_osi=!' can be used in combination with single or
@@ -542,6 +547,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Format: <int> (must be >=0)
Default: 64
+ bau= [X86_UV] Enable the BAU on SGI UV. The default
+ behavior is to disable the BAU (i.e. bau=0).
+ Format: { "0" | "1" }
+ 0 - Disable the BAU.
+ 1 - Enable the BAU.
+ unset - Disable the BAU.
+
baycom_epp= [HW,AX25]
Format: <io>,<mode>
@@ -826,6 +838,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
It will be ignored when crashkernel=X,high is not used
or memory reserved is below 4G.
+ cryptomgr.notests
+ [KNL] Disable crypto self-tests
+
cs89x0_dma= [HW,NET]
Format: <dma>
@@ -1661,6 +1676,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
hwp_only
Only load intel_pstate on systems which support
hardware P state control (HWP) if available.
+ support_acpi_ppc
+ Enforce ACPI _PPC performance limits. If the Fixed ACPI
+ Description Table, specifies preferred power management
+ profile as "Enterprise Server" or "Performance Server",
+ then this feature is turned on by default.
intremap= [X86-64, Intel-IOMMU]
on enable Interrupt Remapping (default)
@@ -1767,6 +1787,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
PCI device 00:14.0 write the parameter as:
ivrs_hpet[0]=00:14.0
+ ivrs_acpihid [HW,X86_64]
+ Provide an override to the ACPI-HID:UID<->DEVICE-ID
+ mapping provided in the IVRS ACPI table. For
+ example, to map UART-HID:UID AMD0020:0 to
+ PCI device 00:14.5 write the parameter as:
+ ivrs_acpihid[00:14.5]=AMD0020:0
+
js= [HW,JOY] Analog joystick
See Documentation/input/joystick.txt.
@@ -2538,6 +2565,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
+ nosmt [KNL,S390] Disable symmetric multithreading (SMT).
+ Equivalent to smt=1.
+
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
@@ -3284,6 +3314,44 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Lazy RCU callbacks are those which RCU can
prove do nothing more than free memory.
+ rcuperf.gp_exp= [KNL]
+ Measure performance of expedited synchronous
+ grace-period primitives.
+
+ rcuperf.holdoff= [KNL]
+ Set test-start holdoff period. The purpose of
+ this parameter is to delay the start of the
+ test until boot completes in order to avoid
+ interference.
+
+ rcuperf.nreaders= [KNL]
+ Set number of RCU readers. The value -1 selects
+ N, where N is the number of CPUs. A value
+ "n" less than -1 selects N-n+1, where N is again
+ the number of CPUs. For example, -2 selects N
+ (the number of CPUs), -3 selects N+1, and so on.
+ A value of "n" less than or equal to -N selects
+ a single reader.
+
+ rcuperf.nwriters= [KNL]
+ Set number of RCU writers. The values operate
+ the same as for rcuperf.nreaders.
+ N, where N is the number of CPUs
+
+ rcuperf.perf_runnable= [BOOT]
+ Start rcuperf running at boot time.
+
+ rcuperf.shutdown= [KNL]
+ Shut the system down after performance tests
+ complete. This is useful for hands-off automated
+ testing.
+
+ rcuperf.perf_type= [KNL]
+ Specify the RCU implementation to test.
+
+ rcuperf.verbose= [KNL]
+ Enable additional printk() statements.
+
rcutorture.cbflood_inter_holdoff= [KNL]
Set holdoff time (jiffies) between successive
callback-flood tests.
@@ -3695,6 +3763,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1: Fast pin select (default)
2: ATC IRMode
+ smt [KNL,S390] Set the maximum number of threads (logical
+ CPUs) to use per physical CPU on systems capable of
+ symmetric multithreading (SMT). Will be capped to the
+ actual hardware limit.
+ Format: <integer>
+ Default: -1 (no limit)
+
softlockup_panic=
[KNL] Should the soft-lockup detector generate panics.
Format: <integer>
@@ -4077,6 +4152,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
sector if the number is odd);
i = IGNORE_DEVICE (don't bind to this
device);
+ j = NO_REPORT_LUNS (don't use report luns
+ command, uas only);
l = NOT_LOCKABLE (don't try to lock and
unlock ejectable media);
m = MAX_SECTORS_64 (don't transfer more
diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt
new file mode 100644
index 000000000000..6c43f6ebee8d
--- /dev/null
+++ b/Documentation/livepatch/livepatch.txt
@@ -0,0 +1,394 @@
+=========
+Livepatch
+=========
+
+This document outlines basic information about kernel livepatching.
+
+Table of Contents:
+
+1. Motivation
+2. Kprobes, Ftrace, Livepatching
+3. Consistency model
+4. Livepatch module
+ 4.1. New functions
+ 4.2. Metadata
+ 4.3. Livepatch module handling
+5. Livepatch life-cycle
+ 5.1. Registration
+ 5.2. Enabling
+ 5.3. Disabling
+ 5.4. Unregistration
+6. Sysfs
+7. Limitations
+
+
+1. Motivation
+=============
+
+There are many situations where users are reluctant to reboot a system. It may
+be because their system is performing complex scientific computations or under
+heavy load during peak usage. In addition to keeping systems up and running,
+users want to also have a stable and secure system. Livepatching gives users
+both by allowing for function calls to be redirected; thus, fixing critical
+functions without a system reboot.
+
+
+2. Kprobes, Ftrace, Livepatching
+================================
+
+There are multiple mechanisms in the Linux kernel that are directly related
+to redirection of code execution; namely: kernel probes, function tracing,
+and livepatching:
+
+ + The kernel probes are the most generic. The code can be redirected by
+ putting a breakpoint instruction instead of any instruction.
+
+ + The function tracer calls the code from a predefined location that is
+ close to the function entry point. This location is generated by the
+ compiler using the '-pg' gcc option.
+
+ + Livepatching typically needs to redirect the code at the very beginning
+ of the function entry before the function parameters or the stack
+ are in any way modified.
+
+All three approaches need to modify the existing code at runtime. Therefore
+they need to be aware of each other and not step over each other's toes.
+Most of these problems are solved by using the dynamic ftrace framework as
+a base. A Kprobe is registered as a ftrace handler when the function entry
+is probed, see CONFIG_KPROBES_ON_FTRACE. Also an alternative function from
+a live patch is called with the help of a custom ftrace handler. But there are
+some limitations, see below.
+
+
+3. Consistency model
+====================
+
+Functions are there for a reason. They take some input parameters, get or
+release locks, read, process, and even write some data in a defined way,
+have return values. In other words, each function has a defined semantic.
+
+Many fixes do not change the semantic of the modified functions. For
+example, they add a NULL pointer or a boundary check, fix a race by adding
+a missing memory barrier, or add some locking around a critical section.
+Most of these changes are self contained and the function presents itself
+the same way to the rest of the system. In this case, the functions might
+be updated independently one by one.
+
+But there are more complex fixes. For example, a patch might change
+ordering of locking in multiple functions at the same time. Or a patch
+might exchange meaning of some temporary structures and update
+all the relevant functions. In this case, the affected unit
+(thread, whole kernel) need to start using all new versions of
+the functions at the same time. Also the switch must happen only
+when it is safe to do so, e.g. when the affected locks are released
+or no data are stored in the modified structures at the moment.
+
+The theory about how to apply functions a safe way is rather complex.
+The aim is to define a so-called consistency model. It attempts to define
+conditions when the new implementation could be used so that the system
+stays consistent. The theory is not yet finished. See the discussion at
+http://thread.gmane.org/gmane.linux.kernel/1823033/focus=1828189
+
+The current consistency model is very simple. It guarantees that either
+the old or the new function is called. But various functions get redirected
+one by one without any synchronization.
+
+In other words, the current implementation _never_ modifies the behavior
+in the middle of the call. It is because it does _not_ rewrite the entire
+function in the memory. Instead, the function gets redirected at the
+very beginning. But this redirection is used immediately even when
+some other functions from the same patch have not been redirected yet.
+
+See also the section "Limitations" below.
+
+
+4. Livepatch module
+===================
+
+Livepatches are distributed using kernel modules, see
+samples/livepatch/livepatch-sample.c.
+
+The module includes a new implementation of functions that we want
+to replace. In addition, it defines some structures describing the
+relation between the original and the new implementation. Then there
+is code that makes the kernel start using the new code when the livepatch
+module is loaded. Also there is code that cleans up before the
+livepatch module is removed. All this is explained in more details in
+the next sections.
+
+
+4.1. New functions
+------------------
+
+New versions of functions are typically just copied from the original
+sources. A good practice is to add a prefix to the names so that they
+can be distinguished from the original ones, e.g. in a backtrace. Also
+they can be declared as static because they are not called directly
+and do not need the global visibility.
+
+The patch contains only functions that are really modified. But they
+might want to access functions or data from the original source file
+that may only be locally accessible. This can be solved by a special
+relocation section in the generated livepatch module, see
+Documentation/livepatch/module-elf-format.txt for more details.
+
+
+4.2. Metadata
+------------
+
+The patch is described by several structures that split the information
+into three levels:
+
+ + struct klp_func is defined for each patched function. It describes
+ the relation between the original and the new implementation of a
+ particular function.
+
+ The structure includes the name, as a string, of the original function.
+ The function address is found via kallsyms at runtime.
+
+ Then it includes the address of the new function. It is defined
+ directly by assigning the function pointer. Note that the new
+ function is typically defined in the same source file.
+
+ As an optional parameter, the symbol position in the kallsyms database can
+ be used to disambiguate functions of the same name. This is not the
+ absolute position in the database, but rather the order it has been found
+ only for a particular object ( vmlinux or a kernel module ). Note that
+ kallsyms allows for searching symbols according to the object name.
+
+ + struct klp_object defines an array of patched functions (struct
+ klp_func) in the same object. Where the object is either vmlinux
+ (NULL) or a module name.
+
+ The structure helps to group and handle functions for each object
+ together. Note that patched modules might be loaded later than
+ the patch itself and the relevant functions might be patched
+ only when they are available.
+
+
+ + struct klp_patch defines an array of patched objects (struct
+ klp_object).
+
+ This structure handles all patched functions consistently and eventually,
+ synchronously. The whole patch is applied only when all patched
+ symbols are found. The only exception are symbols from objects
+ (kernel modules) that have not been loaded yet. Also if a more complex
+ consistency model is supported then a selected unit (thread,
+ kernel as a whole) will see the new code from the entire patch
+ only when it is in a safe state.
+
+
+4.3. Livepatch module handling
+------------------------------
+
+The usual behavior is that the new functions will get used when
+the livepatch module is loaded. For this, the module init() function
+has to register the patch (struct klp_patch) and enable it. See the
+section "Livepatch life-cycle" below for more details about these
+two operations.
+
+Module removal is only safe when there are no users of the underlying
+functions. The immediate consistency model is not able to detect this;
+therefore livepatch modules cannot be removed. See "Limitations" below.
+
+5. Livepatch life-cycle
+=======================
+
+Livepatching defines four basic operations that define the life cycle of each
+live patch: registration, enabling, disabling and unregistration. There are
+several reasons why it is done this way.
+
+First, the patch is applied only when all patched symbols for already
+loaded objects are found. The error handling is much easier if this
+check is done before particular functions get redirected.
+
+Second, the immediate consistency model does not guarantee that anyone is not
+sleeping in the new code after the patch is reverted. This means that the new
+code needs to stay around "forever". If the code is there, one could apply it
+again. Therefore it makes sense to separate the operations that might be done
+once and those that need to be repeated when the patch is enabled (applied)
+again.
+
+Third, it might take some time until the entire system is migrated
+when a more complex consistency model is used. The patch revert might
+block the livepatch module removal for too long. Therefore it is useful
+to revert the patch using a separate operation that might be called
+explicitly. But it does not make sense to remove all information
+until the livepatch module is really removed.
+
+
+5.1. Registration
+-----------------
+
+Each patch first has to be registered using klp_register_patch(). This makes
+the patch known to the livepatch framework. Also it does some preliminary
+computing and checks.
+
+In particular, the patch is added into the list of known patches. The
+addresses of the patched functions are found according to their names.
+The special relocations, mentioned in the section "New functions", are
+applied. The relevant entries are created under
+/sys/kernel/livepatch/<name>. The patch is rejected when any operation
+fails.
+
+
+5.2. Enabling
+-------------
+
+Registered patches might be enabled either by calling klp_enable_patch() or
+by writing '1' to /sys/kernel/livepatch/<name>/enabled. The system will
+start using the new implementation of the patched functions at this stage.
+
+In particular, if an original function is patched for the first time, a
+function specific struct klp_ops is created and an universal ftrace handler
+is registered.
+
+Functions might be patched multiple times. The ftrace handler is registered
+only once for the given function. Further patches just add an entry to the
+list (see field `func_stack`) of the struct klp_ops. The last added
+entry is chosen by the ftrace handler and becomes the active function
+replacement.
+
+Note that the patches might be enabled in a different order than they were
+registered.
+
+
+5.3. Disabling
+--------------
+
+Enabled patches might get disabled either by calling klp_disable_patch() or
+by writing '0' to /sys/kernel/livepatch/<name>/enabled. At this stage
+either the code from the previously enabled patch or even the original
+code gets used.
+
+Here all the functions (struct klp_func) associated with the to-be-disabled
+patch are removed from the corresponding struct klp_ops. The ftrace handler
+is unregistered and the struct klp_ops is freed when the func_stack list
+becomes empty.
+
+Patches must be disabled in exactly the reverse order in which they were
+enabled. It makes the problem and the implementation much easier.
+
+
+5.4. Unregistration
+-------------------
+
+Disabled patches might be unregistered by calling klp_unregister_patch().
+This can be done only when the patch is disabled and the code is no longer
+used. It must be called before the livepatch module gets unloaded.
+
+At this stage, all the relevant sys-fs entries are removed and the patch
+is removed from the list of known patches.
+
+
+6. Sysfs
+========
+
+Information about the registered patches can be found under
+/sys/kernel/livepatch. The patches could be enabled and disabled
+by writing there.
+
+See Documentation/ABI/testing/sysfs-kernel-livepatch for more details.
+
+
+7. Limitations
+==============
+
+The current Livepatch implementation has several limitations:
+
+
+ + The patch must not change the semantic of the patched functions.
+
+ The current implementation guarantees only that either the old
+ or the new function is called. The functions are patched one
+ by one. It means that the patch must _not_ change the semantic
+ of the function.
+
+
+ + Data structures can not be patched.
+
+ There is no support to version data structures or anyhow migrate
+ one structure into another. Also the simple consistency model does
+ not allow to switch more functions atomically.
+
+ Once there is more complex consistency mode, it will be possible to
+ use some workarounds. For example, it will be possible to use a hole
+ for a new member because the data structure is aligned. Or it will
+ be possible to use an existing member for something else.
+
+ There are no plans to add more generic support for modified structures
+ at the moment.
+
+
+ + Only functions that can be traced could be patched.
+
+ Livepatch is based on the dynamic ftrace. In particular, functions
+ implementing ftrace or the livepatch ftrace handler could not be
+ patched. Otherwise, the code would end up in an infinite loop. A
+ potential mistake is prevented by marking the problematic functions
+ by "notrace".
+
+
+ + Anything inlined into __schedule() can not be patched.
+
+ The switch_to macro is inlined into __schedule(). It switches the
+ context between two processes in the middle of the macro. It does
+ not save RIP in x86_64 version (contrary to 32-bit version). Instead,
+ the currently used __schedule()/switch_to() handles both processes.
+
+ Now, let's have two different tasks. One calls the original
+ __schedule(), its registers are stored in a defined order and it
+ goes to sleep in the switch_to macro and some other task is restored
+ using the original __schedule(). Then there is the second task which
+ calls patched__schedule(), it goes to sleep there and the first task
+ is picked by the patched__schedule(). Its RSP is restored and now
+ the registers should be restored as well. But the order is different
+ in the new patched__schedule(), so...
+
+ There is work in progress to remove this limitation.
+
+
+ + Livepatch modules can not be removed.
+
+ The current implementation just redirects the functions at the very
+ beginning. It does not check if the functions are in use. In other
+ words, it knows when the functions get called but it does not
+ know when the functions return. Therefore it can not decide when
+ the livepatch module can be safely removed.
+
+ This will get most likely solved once a more complex consistency model
+ is supported. The idea is that a safe state for patching should also
+ mean a safe state for removing the patch.
+
+ Note that the patch itself might get disabled by writing zero
+ to /sys/kernel/livepatch/<patch>/enabled. It causes that the new
+ code will not longer get called. But it does not guarantee
+ that anyone is not sleeping anywhere in the new code.
+
+
+ + Livepatch works reliably only when the dynamic ftrace is located at
+ the very beginning of the function.
+
+ The function need to be redirected before the stack or the function
+ parameters are modified in any way. For example, livepatch requires
+ using -fentry gcc compiler option on x86_64.
+
+ One exception is the PPC port. It uses relative addressing and TOC.
+ Each function has to handle TOC and save LR before it could call
+ the ftrace handler. This operation has to be reverted on return.
+ Fortunately, the generic ftrace code has the same problem and all
+ this is is handled on the ftrace level.
+
+
+ + Kretprobes using the ftrace framework conflict with the patched
+ functions.
+
+ Both kretprobes and livepatches use a ftrace handler that modifies
+ the return address. The first user wins. Either the probe or the patch
+ is rejected when the handler is already in use by the other.
+
+
+ + Kprobes in the original function are ignored when the code is
+ redirected to the new implementation.
+
+ There is a work in progress to add warnings about this situation.
diff --git a/Documentation/livepatch/module-elf-format.txt b/Documentation/livepatch/module-elf-format.txt
new file mode 100644
index 000000000000..eedbdcf8ba50
--- /dev/null
+++ b/Documentation/livepatch/module-elf-format.txt
@@ -0,0 +1,311 @@
+===========================
+Livepatch module Elf format
+===========================
+
+This document outlines the Elf format requirements that livepatch modules must follow.
+
+-----------------
+Table of Contents
+-----------------
+0. Background and motivation
+1. Livepatch modinfo field
+2. Livepatch relocation sections
+ 2.1 What are livepatch relocation sections?
+ 2.2 Livepatch relocation section format
+ 2.2.1 Required flags
+ 2.2.2 Required name format
+ 2.2.3 Example livepatch relocation section names
+ 2.2.4 Example `readelf --sections` output
+ 2.2.5 Example `readelf --relocs` output
+3. Livepatch symbols
+ 3.1 What are livepatch symbols?
+ 3.2 A livepatch module's symbol table
+ 3.3 Livepatch symbol format
+ 3.3.1 Required flags
+ 3.3.2 Required name format
+ 3.3.3 Example livepatch symbol names
+ 3.3.4 Example `readelf --symbols` output
+4. Symbol table and Elf section access
+
+----------------------------
+0. Background and motivation
+----------------------------
+
+Formerly, livepatch required separate architecture-specific code to write
+relocations. However, arch-specific code to write relocations already
+exists in the module loader, so this former approach produced redundant
+code. So, instead of duplicating code and re-implementing what the module
+loader can already do, livepatch leverages existing code in the module
+loader to perform the all the arch-specific relocation work. Specifically,
+livepatch reuses the apply_relocate_add() function in the module loader to
+write relocations. The patch module Elf format described in this document
+enables livepatch to be able to do this. The hope is that this will make
+livepatch more easily portable to other architectures and reduce the amount
+of arch-specific code required to port livepatch to a particular
+architecture.
+
+Since apply_relocate_add() requires access to a module's section header
+table, symbol table, and relocation section indices, Elf information is
+preserved for livepatch modules (see section 4). Livepatch manages its own
+relocation sections and symbols, which are described in this document. The
+Elf constants used to mark livepatch symbols and relocation sections were
+selected from OS-specific ranges according to the definitions from glibc.
+
+0.1 Why does livepatch need to write its own relocations?
+---------------------------------------------------------
+A typical livepatch module contains patched versions of functions that can
+reference non-exported global symbols and non-included local symbols.
+Relocations referencing these types of symbols cannot be left in as-is
+since the kernel module loader cannot resolve them and will therefore
+reject the livepatch module. Furthermore, we cannot apply relocations that
+affect modules not yet loaded at patch module load time (e.g. a patch to a
+driver that is not loaded). Formerly, livepatch solved this problem by
+embedding special "dynrela" (dynamic rela) sections in the resulting patch
+module Elf output. Using these dynrela sections, livepatch could resolve
+symbols while taking into account its scope and what module the symbol
+belongs to, and then manually apply the dynamic relocations. However this
+approach required livepatch to supply arch-specific code in order to write
+these relocations. In the new format, livepatch manages its own SHT_RELA
+relocation sections in place of dynrela sections, and the symbols that the
+relas reference are special livepatch symbols (see section 2 and 3). The
+arch-specific livepatch relocation code is replaced by a call to
+apply_relocate_add().
+
+================================
+PATCH MODULE FORMAT REQUIREMENTS
+================================
+
+--------------------------
+1. Livepatch modinfo field
+--------------------------
+
+Livepatch modules are required to have the "livepatch" modinfo attribute.
+See the sample livepatch module in samples/livepatch/ for how this is done.
+
+Livepatch modules can be identified by users by using the 'modinfo' command
+and looking for the presence of the "livepatch" field. This field is also
+used by the kernel module loader to identify livepatch modules.
+
+Example modinfo output:
+-----------------------
+% modinfo livepatch-meminfo.ko
+filename: livepatch-meminfo.ko
+livepatch: Y
+license: GPL
+depends:
+vermagic: 4.3.0+ SMP mod_unload
+
+--------------------------------
+2. Livepatch relocation sections
+--------------------------------
+
+-------------------------------------------
+2.1 What are livepatch relocation sections?
+-------------------------------------------
+A livepatch module manages its own Elf relocation sections to apply
+relocations to modules as well as to the kernel (vmlinux) at the
+appropriate time. For example, if a patch module patches a driver that is
+not currently loaded, livepatch will apply the corresponding livepatch
+relocation section(s) to the driver once it loads.
+
+Each "object" (e.g. vmlinux, or a module) within a patch module may have
+multiple livepatch relocation sections associated with it (e.g. patches to
+multiple functions within the same object). There is a 1-1 correspondence
+between a livepatch relocation section and the target section (usually the
+text section of a function) to which the relocation(s) apply. It is
+also possible for a livepatch module to have no livepatch relocation
+sections, as in the case of the sample livepatch module (see
+samples/livepatch).
+
+Since Elf information is preserved for livepatch modules (see Section 4), a
+livepatch relocation section can be applied simply by passing in the
+appropriate section index to apply_relocate_add(), which then uses it to
+access the relocation section and apply the relocations.
+
+Every symbol referenced by a rela in a livepatch relocation section is a
+livepatch symbol. These must be resolved before livepatch can call
+apply_relocate_add(). See Section 3 for more information.
+
+---------------------------------------
+2.2 Livepatch relocation section format
+---------------------------------------
+
+2.2.1 Required flags
+--------------------
+Livepatch relocation sections must be marked with the SHF_RELA_LIVEPATCH
+section flag. See include/uapi/linux/elf.h for the definition. The module
+loader recognizes this flag and will avoid applying those relocation sections
+at patch module load time. These sections must also be marked with SHF_ALLOC,
+so that the module loader doesn't discard them on module load (i.e. they will
+be copied into memory along with the other SHF_ALLOC sections).
+
+2.2.2 Required name format
+--------------------------
+The name of a livepatch relocation section must conform to the following format:
+
+.klp.rela.objname.section_name
+^ ^^ ^ ^ ^
+|________||_____| |__________|
+ [A] [B] [C]
+
+[A] The relocation section name is prefixed with the string ".klp.rela."
+[B] The name of the object (i.e. "vmlinux" or name of module) to
+ which the relocation section belongs follows immediately after the prefix.
+[C] The actual name of the section to which this relocation section applies.
+
+2.2.3 Example livepatch relocation section names:
+-------------------------------------------------
+.klp.rela.ext4.text.ext4_attr_store
+.klp.rela.vmlinux.text.cmdline_proc_show
+
+2.2.4 Example `readelf --sections` output for a patch
+module that patches vmlinux and modules 9p, btrfs, ext4:
+--------------------------------------------------------
+ Section Headers:
+ [Nr] Name Type Address Off Size ES Flg Lk Inf Al
+ [ snip ]
+ [29] .klp.rela.9p.text.caches.show RELA 0000000000000000 002d58 0000c0 18 AIo 64 9 8
+ [30] .klp.rela.btrfs.text.btrfs.feature.attr.show RELA 0000000000000000 002e18 000060 18 AIo 64 11 8
+ [ snip ]
+ [34] .klp.rela.ext4.text.ext4.attr.store RELA 0000000000000000 002fd8 0000d8 18 AIo 64 13 8
+ [35] .klp.rela.ext4.text.ext4.attr.show RELA 0000000000000000 0030b0 000150 18 AIo 64 15 8
+ [36] .klp.rela.vmlinux.text.cmdline.proc.show RELA 0000000000000000 003200 000018 18 AIo 64 17 8
+ [37] .klp.rela.vmlinux.text.meminfo.proc.show RELA 0000000000000000 003218 0000f0 18 AIo 64 19 8
+ [ snip ] ^ ^
+ | |
+ [*] [*]
+[*] Livepatch relocation sections are SHT_RELA sections but with a few special
+characteristics. Notice that they are marked SHF_ALLOC ("A") so that they will
+not be discarded when the module is loaded into memory, as well as with the
+SHF_RELA_LIVEPATCH flag ("o" - for OS-specific).
+
+2.2.5 Example `readelf --relocs` output for a patch module:
+-----------------------------------------------------------
+Relocation section '.klp.rela.btrfs.text.btrfs_feature_attr_show' at offset 0x2ba0 contains 4 entries:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+000000000000001f 0000005e00000002 R_X86_64_PC32 0000000000000000 .klp.sym.vmlinux.printk,0 - 4
+0000000000000028 0000003d0000000b R_X86_64_32S 0000000000000000 .klp.sym.btrfs.btrfs_ktype,0 + 0
+0000000000000036 0000003b00000002 R_X86_64_PC32 0000000000000000 .klp.sym.btrfs.can_modify_feature.isra.3,0 - 4
+000000000000004c 0000004900000002 R_X86_64_PC32 0000000000000000 .klp.sym.vmlinux.snprintf,0 - 4
+[ snip ] ^
+ |
+ [*]
+[*] Every symbol referenced by a relocation is a livepatch symbol.
+
+--------------------
+3. Livepatch symbols
+--------------------
+
+-------------------------------
+3.1 What are livepatch symbols?
+-------------------------------
+Livepatch symbols are symbols referred to by livepatch relocation sections.
+These are symbols accessed from new versions of functions for patched
+objects, whose addresses cannot be resolved by the module loader (because
+they are local or unexported global syms). Since the module loader only
+resolves exported syms, and not every symbol referenced by the new patched
+functions is exported, livepatch symbols were introduced. They are used
+also in cases where we cannot immediately know the address of a symbol when
+a patch module loads. For example, this is the case when livepatch patches
+a module that is not loaded yet. In this case, the relevant livepatch
+symbols are resolved simply when the target module loads. In any case, for
+any livepatch relocation section, all livepatch symbols referenced by that
+section must be resolved before livepatch can call apply_relocate_add() for
+that reloc section.
+
+Livepatch symbols must be marked with SHN_LIVEPATCH so that the module
+loader can identify and ignore them. Livepatch modules keep these symbols
+in their symbol tables, and the symbol table is made accessible through
+module->symtab.
+
+-------------------------------------
+3.2 A livepatch module's symbol table
+-------------------------------------
+Normally, a stripped down copy of a module's symbol table (containing only
+"core" symbols) is made available through module->symtab (See layout_symtab()
+in kernel/module.c). For livepatch modules, the symbol table copied into memory
+on module load must be exactly the same as the symbol table produced when the
+patch module was compiled. This is because the relocations in each livepatch
+relocation section refer to their respective symbols with their symbol indices,
+and the original symbol indices (and thus the symtab ordering) must be
+preserved in order for apply_relocate_add() to find the right symbol.
+
+For example, take this particular rela from a livepatch module:
+Relocation section '.klp.rela.btrfs.text.btrfs_feature_attr_show' at offset 0x2ba0 contains 4 entries:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+000000000000001f 0000005e00000002 R_X86_64_PC32 0000000000000000 .klp.sym.vmlinux.printk,0 - 4
+
+This rela refers to the symbol '.klp.sym.vmlinux.printk,0', and the symbol index is encoded
+in 'Info'. Here its symbol index is 0x5e, which is 94 in decimal, which refers to the
+symbol index 94.
+And in this patch module's corresponding symbol table, symbol index 94 refers to that very symbol:
+[ snip ]
+94: 0000000000000000 0 NOTYPE GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.printk,0
+[ snip ]
+
+---------------------------
+3.3 Livepatch symbol format
+---------------------------
+
+3.3.1 Required flags
+--------------------
+Livepatch symbols must have their section index marked as SHN_LIVEPATCH, so
+that the module loader can identify them and not attempt to resolve them.
+See include/uapi/linux/elf.h for the actual definitions.
+
+3.3.2 Required name format
+--------------------------
+Livepatch symbol names must conform to the following format:
+
+.klp.sym.objname.symbol_name,sympos
+^ ^^ ^ ^ ^ ^
+|_______||_____| |_________| |
+ [A] [B] [C] [D]
+
+[A] The symbol name is prefixed with the string ".klp.sym."
+[B] The name of the object (i.e. "vmlinux" or name of module) to
+ which the symbol belongs follows immediately after the prefix.
+[C] The actual name of the symbol.
+[D] The position of the symbol in the object (as according to kallsyms)
+ This is used to differentiate duplicate symbols within the same
+ object. The symbol position is expressed numerically (0, 1, 2...).
+ The symbol position of a unique symbol is 0.
+
+3.3.3 Example livepatch symbol names:
+-------------------------------------
+.klp.sym.vmlinux.snprintf,0
+.klp.sym.vmlinux.printk,0
+.klp.sym.btrfs.btrfs_ktype,0
+
+3.3.4 Example `readelf --symbols` output for a patch module:
+------------------------------------------------------------
+Symbol table '.symtab' contains 127 entries:
+ Num: Value Size Type Bind Vis Ndx Name
+ [ snip ]
+ 73: 0000000000000000 0 NOTYPE GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.snprintf,0
+ 74: 0000000000000000 0 NOTYPE GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.capable,0
+ 75: 0000000000000000 0 NOTYPE GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.find_next_bit,0
+ 76: 0000000000000000 0 NOTYPE GLOBAL DEFAULT OS [0xff20] .klp.sym.vmlinux.si_swapinfo,0
+ [ snip ] ^
+ |
+ [*]
+[*] Note that the 'Ndx' (Section index) for these symbols is SHN_LIVEPATCH (0xff20).
+ "OS" means OS-specific.
+
+--------------------------------------
+4. Symbol table and Elf section access
+--------------------------------------
+A livepatch module's symbol table is accessible through module->symtab.
+
+Since apply_relocate_add() requires access to a module's section headers,
+symbol table, and relocation section indices, Elf information is preserved for
+livepatch modules and is made accessible by the module loader through
+module->klp_info, which is a klp_modinfo struct. When a livepatch module loads,
+this struct is filled in by the module loader. Its fields are documented below:
+
+struct klp_modinfo {
+ Elf_Ehdr hdr; /* Elf header */
+ Elf_Shdr *sechdrs; /* Section header table */
+ char *secstrings; /* String table for the section headers */
+ unsigned int symndx; /* The symbol table section index */
+};
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
index 5001280e9d82..9de1c158d44c 100644
--- a/Documentation/locking/lockdep-design.txt
+++ b/Documentation/locking/lockdep-design.txt
@@ -97,7 +97,7 @@ between any two lock-classes:
<hardirq-safe> -> <hardirq-unsafe>
<softirq-safe> -> <softirq-unsafe>
-The first rule comes from the fact the a hardirq-safe lock could be
+The first rule comes from the fact that a hardirq-safe lock could be
taken by a hardirq context, interrupting a hardirq-unsafe lock - and
thus could result in a lock inversion deadlock. Likewise, a softirq-safe
lock could be taken by an softirq context, interrupting a softirq-unsafe
@@ -220,7 +220,7 @@ calculated, which hash is unique for every lock chain. The hash value,
when the chain is validated for the first time, is then put into a hash
table, which hash-table can be checked in a lockfree manner. If the
locking chain occurs again later on, the hash table tells us that we
-dont have to validate the chain again.
+don't have to validate the chain again.
Troubleshooting:
----------------
diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt
index c100c7163507..38883276d31c 100644
--- a/Documentation/md-cluster.txt
+++ b/Documentation/md-cluster.txt
@@ -316,3 +316,9 @@ The algorithm is:
nodes are using the raid which is achieved by lock all bitmap
locks within the cluster, and also those locks are unlocked
accordingly.
+
+7. Unsupported features
+
+There are somethings which are not supported by cluster MD yet.
+
+- update size and change array_sectors.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 3729cbe60e41..147ae8ec836f 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -4,8 +4,40 @@
By: David Howells <dhowells@redhat.com>
Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ Will Deacon <will.deacon@arm.com>
+ Peter Zijlstra <peterz@infradead.org>
-Contents:
+==========
+DISCLAIMER
+==========
+
+This document is not a specification; it is intentionally (for the sake of
+brevity) and unintentionally (due to being human) incomplete. This document is
+meant as a guide to using the various memory barriers provided by Linux, but
+in case of any doubt (and there are many) please ask.
+
+To repeat, this document is not a specification of what Linux expects from
+hardware.
+
+The purpose of this document is twofold:
+
+ (1) to specify the minimum functionality that one can rely on for any
+ particular barrier, and
+
+ (2) to provide a guide as to how to use the barriers that are available.
+
+Note that an architecture can provide more than the minimum requirement
+for any particular barrier, but if the architecure provides less than
+that, that architecture is incorrect.
+
+Note also that it is possible that a barrier may be a no-op for an
+architecture because the way that arch works renders an explicit barrier
+unnecessary in that case.
+
+
+========
+CONTENTS
+========
(*) Abstract memory access model.
@@ -31,15 +63,15 @@ Contents:
(*) Implicit kernel memory barriers.
- - Locking functions.
+ - Lock acquisition functions.
- Interrupt disabling functions.
- Sleep and wake-up functions.
- Miscellaneous functions.
- (*) Inter-CPU locking barrier effects.
+ (*) Inter-CPU acquiring barrier effects.
- - Locks vs memory accesses.
- - Locks vs I/O accesses.
+ - Acquires vs memory accesses.
+ - Acquires vs I/O accesses.
(*) Where are memory barriers needed?
@@ -61,6 +93,7 @@ Contents:
(*) The things CPUs get up to.
- And then there's the Alpha.
+ - Virtual Machine Guests.
(*) Example uses.
@@ -148,7 +181,7 @@ As a further example, consider this sequence of events:
CPU 1 CPU 2
=============== ===============
- { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
B = 4; Q = P;
P = &B D = *Q;
@@ -430,8 +463,9 @@ And a couple of implicit varieties:
This acts as a one-way permeable barrier. It guarantees that all memory
operations after the ACQUIRE operation will appear to happen after the
ACQUIRE operation with respect to the other components of the system.
- ACQUIRE operations include LOCK operations and smp_load_acquire()
- operations.
+ ACQUIRE operations include LOCK operations and both smp_load_acquire()
+ and smp_cond_acquire() operations. The later builds the necessary ACQUIRE
+ semantics from relying on a control dependency and smp_rmb().
Memory operations that occur before an ACQUIRE operation may appear to
happen after it completes.
@@ -464,6 +498,11 @@ And a couple of implicit varieties:
This means that ACQUIRE acts as a minimal "acquire" operation and
RELEASE acts as a minimal "release" operation.
+A subset of the atomic operations described in atomic_ops.txt have ACQUIRE
+and RELEASE variants in addition to fully-ordered and relaxed (no barrier
+semantics) definitions. For compound atomics performing both a load and a
+store, ACQUIRE semantics apply only to the load and RELEASE semantics apply
+only to the store portion of the operation.
Memory barriers are only required where there's a possibility of interaction
between two CPUs or between a CPU and a device. If it can be guaranteed that
@@ -517,7 +556,7 @@ following sequence of events:
CPU 1 CPU 2
=============== ===============
- { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
B = 4;
<write barrier>
WRITE_ONCE(P, &B)
@@ -544,7 +583,7 @@ between the address load and the data load:
CPU 1 CPU 2
=============== ===============
- { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ { A == 1, B == 2, C == 3, P == &A, Q == &C }
B = 4;
<write barrier>
WRITE_ONCE(P, &B);
@@ -813,9 +852,10 @@ In summary:
the same variable, then those stores must be ordered, either by
preceding both of them with smp_mb() or by using smp_store_release()
to carry out the stores. Please note that it is -not- sufficient
- to use barrier() at beginning of each leg of the "if" statement,
- as optimizing compilers do not necessarily respect barrier()
- in this case.
+ to use barrier() at beginning of each leg of the "if" statement
+ because, as shown by the example above, optimizing compilers can
+ destroy the control dependency while respecting the letter of the
+ barrier() law.
(*) Control dependencies require at least one run-time conditional
between the prior load and the subsequent store, and this
@@ -1731,15 +1771,15 @@ The Linux kernel has eight basic CPU memory barriers:
All memory barriers except the data dependency barriers imply a compiler
-barrier. Data dependencies do not impose any additional compiler ordering.
+barrier. Data dependencies do not impose any additional compiler ordering.
Aside: In the case of data dependencies, the compiler would be expected
to issue the loads in the correct order (eg. `a[b]` would have to load
the value of b before loading a[b]), however there is no guarantee in
the C specification that the compiler may not speculate the value of b
(eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1)
-tmp = a[b]; ). There is also the problem of a compiler reloading b after
-having loaded a[b], thus having a newer copy of b than a[b]. A consensus
+tmp = a[b]; ). There is also the problem of a compiler reloading b after
+having loaded a[b], thus having a newer copy of b than a[b]. A consensus
has not yet been reached about these problems, however the READ_ONCE()
macro is a good place to start looking.
@@ -1794,6 +1834,7 @@ There are some more advanced barrier functions:
(*) lockless_dereference();
+
This can be thought of as a pointer-fetch wrapper around the
smp_read_barrier_depends() data-dependency barrier.
@@ -1858,7 +1899,7 @@ This is a variation on the mandatory write barrier that causes writes to weakly
ordered I/O regions to be partially ordered. Its effects may go beyond the
CPU->Hardware interface and actually affect the hardware at some level.
-See the subsection "Locks vs I/O accesses" for more information.
+See the subsection "Acquires vs I/O accesses" for more information.
===============================
@@ -1873,8 +1914,8 @@ provide more substantial guarantees, but these may not be relied upon outside
of arch specific code.
-ACQUIRING FUNCTIONS
--------------------
+LOCK ACQUISITION FUNCTIONS
+--------------------------
The Linux kernel has a number of locking constructs:
@@ -1895,7 +1936,7 @@ for each construct. These operations all imply certain barriers:
Memory operations issued before the ACQUIRE may be completed after
the ACQUIRE operation has completed. An smp_mb__before_spinlock(),
combined with a following ACQUIRE, orders prior stores against
- subsequent loads and stores. Note that this is weaker than smp_mb()!
+ subsequent loads and stores. Note that this is weaker than smp_mb()!
The smp_mb__before_spinlock() primitive is free on many architectures.
(2) RELEASE operation implication:
@@ -2090,9 +2131,9 @@ or:
event_indicated = 1;
wake_up_process(event_daemon);
-A write memory barrier is implied by wake_up() and co. if and only if they wake
-something up. The barrier occurs before the task state is cleared, and so sits
-between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+A write memory barrier is implied by wake_up() and co. if and only if they
+wake something up. The barrier occurs before the task state is cleared, and so
+sits between the STORE to indicate the event and the STORE to set TASK_RUNNING:
CPU 1 CPU 2
=============================== ===============================
@@ -2206,7 +2247,7 @@ three CPUs; then should the following sequence of events occur:
Then there is no guarantee as to what order CPU 3 will see the accesses to *A
through *H occur in, other than the constraints imposed by the separate locks
-on the separate CPUs. It might, for example, see:
+on the separate CPUs. It might, for example, see:
*E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
@@ -2486,9 +2527,9 @@ The following operations are special locking primitives:
clear_bit_unlock();
__clear_bit_unlock();
-These implement ACQUIRE-class and RELEASE-class operations. These should be used in
-preference to other operations when implementing locking primitives, because
-their implementations can be optimised on many architectures.
+These implement ACQUIRE-class and RELEASE-class operations. These should be
+used in preference to other operations when implementing locking primitives,
+because their implementations can be optimised on many architectures.
[!] Note that special memory barrier primitives are available for these
situations because on some CPUs the atomic instructions used imply full memory
@@ -2568,12 +2609,12 @@ explicit barriers are used.
Normally this won't be a problem because the I/O accesses done inside such
sections will include synchronous load operations on strictly ordered I/O
-registers that form implicit I/O barriers. If this isn't sufficient then an
+registers that form implicit I/O barriers. If this isn't sufficient then an
mmiowb() may need to be used explicitly.
A similar situation may occur between an interrupt routine and two routines
-running on separate CPUs that communicate with each other. If such a case is
+running on separate CPUs that communicate with each other. If such a case is
likely, then interrupt-disabling locks should be used to guarantee ordering.
@@ -2587,8 +2628,8 @@ functions:
(*) inX(), outX():
These are intended to talk to I/O space rather than memory space, but
- that's primarily a CPU-specific concept. The i386 and x86_64 processors do
- indeed have special I/O space access cycles and instructions, but many
+ that's primarily a CPU-specific concept. The i386 and x86_64 processors
+ do indeed have special I/O space access cycles and instructions, but many
CPUs don't have such a concept.
The PCI bus, amongst others, defines an I/O space concept which - on such
@@ -2610,7 +2651,7 @@ functions:
Whether these are guaranteed to be fully ordered and uncombined with
respect to each other on the issuing CPU depends on the characteristics
- defined for the memory window through which they're accessing. On later
+ defined for the memory window through which they're accessing. On later
i386 architecture machines, for example, this is controlled by way of the
MTRR registers.
@@ -2635,10 +2676,10 @@ functions:
(*) readX_relaxed(), writeX_relaxed()
These are similar to readX() and writeX(), but provide weaker memory
- ordering guarantees. Specifically, they do not guarantee ordering with
+ ordering guarantees. Specifically, they do not guarantee ordering with
respect to normal memory accesses (e.g. DMA buffers) nor do they guarantee
- ordering with respect to LOCK or UNLOCK operations. If the latter is
- required, an mmiowb() barrier can be used. Note that relaxed accesses to
+ ordering with respect to LOCK or UNLOCK operations. If the latter is
+ required, an mmiowb() barrier can be used. Note that relaxed accesses to
the same peripheral are guaranteed to be ordered with respect to each
other.
@@ -3040,8 +3081,9 @@ The Alpha defines the Linux kernel's memory barrier model.
See the subsection on "Cache Coherency" above.
+
VIRTUAL MACHINE GUESTS
--------------------
+----------------------
Guests running within virtual machines might be affected by SMP effects even if
the guest itself is compiled without SMP support. This is an artifact of
@@ -3050,7 +3092,7 @@ barriers for this use-case would be possible but is often suboptimal.
To handle this case optimally, low-level virt_mb() etc macros are available.
These have the same effect as smp_mb() etc when SMP is enabled, but generate
-identical code for SMP and non-SMP systems. For example, virtual machine guests
+identical code for SMP and non-SMP systems. For example, virtual machine guests
should use virt_mb() rather than smp_mb() when synchronizing against a
(possibly SMP) host.
@@ -3058,6 +3100,7 @@ These are equivalent to smp_mb() etc counterparts in all other respects,
in particular, they do not control MMIO effects: to control
MMIO effects, use mandatory barriers.
+
============
EXAMPLE USES
============
diff --git a/Documentation/networking/altera_tse.txt b/Documentation/networking/altera_tse.txt
index cd417d7b5bd4..50b8589d12fd 100644
--- a/Documentation/networking/altera_tse.txt
+++ b/Documentation/networking/altera_tse.txt
@@ -65,14 +65,14 @@ Driver parameters can be also passed in command line by using:
4.1) Transmit process
When the driver's transmit routine is called by the kernel, it sets up a
transmit descriptor by calling the underlying DMA transmit routine (SGDMA or
-MSGDMA), and initites a transmit operation. Once the transmit is complete, an
+MSGDMA), and initiates a transmit operation. Once the transmit is complete, an
interrupt is driven by the transmit DMA logic. The driver handles the transmit
completion in the context of the interrupt handling chain by recycling
resource required to send and track the requested transmit operation.
4.2) Receive process
The driver will post receive buffers to the receive DMA logic during driver
-intialization. Receive buffers may or may not be queued depending upon the
+initialization. Receive buffers may or may not be queued depending upon the
underlying DMA logic (MSGDMA is able queue receive buffers, SGDMA is not able
to queue receive buffers to the SGDMA receive logic). When a packet is
received, the DMA logic generates an interrupt. The driver handles a receive
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 334b49ef02d1..57f52cdce32e 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -1880,8 +1880,8 @@ or more peers on the local network.
The ARP monitor relies on the device driver itself to verify
that traffic is flowing. In particular, the driver must keep up to
-date the last receive time, dev->last_rx, and transmit start time,
-dev->trans_start. If these are not updated by the driver, then the
+date the last receive time, dev->last_rx. Drivers that use NETIF_F_LLTX
+flag must also update netdev_queue->trans_start. If they do not, then the
ARP monitor will immediately fail any slaves using that driver, and
those slaves will stay down. If networking monitoring (tcpdump, etc)
shows the ARP requests and replies on the network, then it may be that
diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt
index de2a327766a7..56e36861245f 100644
--- a/Documentation/networking/checksum-offloads.txt
+++ b/Documentation/networking/checksum-offloads.txt
@@ -69,18 +69,18 @@ LCO: Local Checksum Offload
LCO is a technique for efficiently computing the outer checksum of an
encapsulated datagram when the inner checksum is due to be offloaded.
The ones-complement sum of a correctly checksummed TCP or UDP packet is
- equal to the sum of the pseudo header, because everything else gets
- 'cancelled out' by the checksum field. This is because the sum was
+ equal to the complement of the sum of the pseudo header, because everything
+ else gets 'cancelled out' by the checksum field. This is because the sum was
complemented before being written to the checksum field.
More generally, this holds in any case where the 'IP-style' ones complement
checksum is used, and thus any checksum that TX Checksum Offload supports.
That is, if we have set up TX Checksum Offload with a start/offset pair, we
- know that _after the device has filled in that checksum_, the ones
+ know that after the device has filled in that checksum, the ones
complement sum from csum_start to the end of the packet will be equal to
- _whatever value we put in the checksum field beforehand_. This allows us
- to compute the outer checksum without looking at the payload: we simply
- stop summing when we get to csum_start, then add the 16-bit word at
- (csum_start + csum_offset).
+ the complement of whatever value we put in the checksum field beforehand.
+ This allows us to compute the outer checksum without looking at the payload:
+ we simply stop summing when we get to csum_start, then add the complement of
+ the 16-bit word at (csum_start + csum_offset).
Then, when the true inner checksum is filled in (either by hardware or by
skb_checksum_help()), the outer checksum will become correct by virtue of
the arithmetic.
diff --git a/Documentation/networking/dsa/bcm_sf2.txt b/Documentation/networking/dsa/bcm_sf2.txt
index d999d0c1c5b8..eba3a2431e91 100644
--- a/Documentation/networking/dsa/bcm_sf2.txt
+++ b/Documentation/networking/dsa/bcm_sf2.txt
@@ -38,7 +38,7 @@ Implementation details
======================
The driver is located in drivers/net/dsa/bcm_sf2.c and is implemented as a DSA
-driver; see Documentation/networking/dsa/dsa.txt for details on the subsytem
+driver; see Documentation/networking/dsa/dsa.txt for details on the subsystem
and what it provides.
The SF2 switch is configured to enable a Broadcom specific 4-bytes switch tag
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt
index 3b196c304b73..631b0f7ae16f 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.txt
@@ -334,7 +334,7 @@ more specifically with its VLAN filtering portion when configuring VLANs on top
of per-port slave network devices. Since DSA primarily deals with
MDIO-connected switches, although not exclusively, SWITCHDEV's
prepare/abort/commit phases are often simplified into a prepare phase which
-checks whether the operation is supporte by the DSA switch driver, and a commit
+checks whether the operation is supported by the DSA switch driver, and a commit
phase which applies the changes.
As of today, the only SWITCHDEV objects supported by DSA are the FDB and VLAN
@@ -533,7 +533,7 @@ Bridge layer
out at the switch hardware for the switch to (re) learn MAC addresses behind
this port.
-- port_stp_update: bridge layer function invoked when a given switch port STP
+- port_stp_state_set: bridge layer function invoked when a given switch port STP
state is computed by the bridge layer and should be propagated to switch
hardware to forward/block/learn traffic. The switch driver is responsible for
computing a STP state change based on current and asked parameters and perform
@@ -542,6 +542,12 @@ Bridge layer
Bridge VLAN filtering
---------------------
+- port_vlan_prepare: bridge layer function invoked when the bridge prepares the
+ configuration of a VLAN on the given port. If the operation is not supported
+ by the hardware, this function should return -EOPNOTSUPP to inform the bridge
+ code to fallback to a software implementation. No hardware setup must be done
+ in this function. See port_vlan_add for this and details.
+
- port_vlan_add: bridge layer function invoked when a VLAN is configured
(tagged or untagged) for the given switch port
@@ -552,6 +558,12 @@ Bridge VLAN filtering
function that the driver has to call for each VLAN the given port is a member
of. A switchdev object is used to carry the VID and bridge flags.
+- port_fdb_prepare: bridge layer function invoked when the bridge prepares the
+ installation of a Forwarding Database entry. If the operation is not
+ supported, this function should return -EOPNOTSUPP to inform the bridge code
+ to fallback to a software implementation. No hardware setup must be done in
+ this function. See port_fdb_add for this and details.
+
- port_fdb_add: bridge layer function invoked when the bridge wants to install a
Forwarding Database entry, the switch hardware should be programmed with the
specified address in the specified VLAN Id in the forwarding database
@@ -565,6 +577,10 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device.
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database
+- port_fdb_dump: bridge layer function invoked with a switchdev callback
+ function that the driver has to call for each MAC address known to be behind
+ the given port. A switchdev object is used to carry the VID and FDB info.
+
TODO
====
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 11f67f181d39..683ada5ad81d 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -216,14 +216,14 @@ opcodes as defined in linux/filter.h stand for:
jmp 6 Jump to label
ja 6 Jump to label
- jeq 7, 8 Jump on k == A
- jneq 8 Jump on k != A
- jne 8 Jump on k != A
- jlt 8 Jump on k < A
- jle 8 Jump on k <= A
- jgt 7, 8 Jump on k > A
- jge 7, 8 Jump on k >= A
- jset 7, 8 Jump on k & A
+ jeq 7, 8 Jump on A == k
+ jneq 8 Jump on A != k
+ jne 8 Jump on A != k
+ jlt 8 Jump on A < k
+ jle 8 Jump on A <= k
+ jgt 7, 8 Jump on A > k
+ jge 7, 8 Jump on A >= k
+ jset 7, 8 Jump on A & k
add 0, 4 A + <x>
sub 0, 4 A - <x>
@@ -1095,6 +1095,87 @@ all use cases.
See details of eBPF verifier in kernel/bpf/verifier.c
+Direct packet access
+--------------------
+In cls_bpf and act_bpf programs the verifier allows direct access to the packet
+data via skb->data and skb->data_end pointers.
+Ex:
+1: r4 = *(u32 *)(r1 +80) /* load skb->data_end */
+2: r3 = *(u32 *)(r1 +76) /* load skb->data */
+3: r5 = r3
+4: r5 += 14
+5: if r5 > r4 goto pc+16
+R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
+6: r0 = *(u16 *)(r3 +12) /* access 12 and 13 bytes of the packet */
+
+this 2byte load from the packet is safe to do, since the program author
+did check 'if (skb->data + 14 > skb->data_end) goto err' at insn #5 which
+means that in the fall-through case the register R3 (which points to skb->data)
+has at least 14 directly accessible bytes. The verifier marks it
+as R3=pkt(id=0,off=0,r=14).
+id=0 means that no additional variables were added to the register.
+off=0 means that no additional constants were added.
+r=14 is the range of safe access which means that bytes [R3, R3 + 14) are ok.
+Note that R5 is marked as R5=pkt(id=0,off=14,r=14). It also points
+to the packet data, but constant 14 was added to the register, so
+it now points to 'skb->data + 14' and accessible range is [R5, R5 + 14 - 14)
+which is zero bytes.
+
+More complex packet access may look like:
+ R0=imm1 R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
+ 6: r0 = *(u8 *)(r3 +7) /* load 7th byte from the packet */
+ 7: r4 = *(u8 *)(r3 +12)
+ 8: r4 *= 14
+ 9: r3 = *(u32 *)(r1 +76) /* load skb->data */
+10: r3 += r4
+11: r2 = r1
+12: r2 <<= 48
+13: r2 >>= 48
+14: r3 += r2
+15: r2 = r3
+16: r2 += 8
+17: r1 = *(u32 *)(r1 +80) /* load skb->data_end */
+18: if r2 > r1 goto pc+2
+ R0=inv56 R1=pkt_end R2=pkt(id=2,off=8,r=8) R3=pkt(id=2,off=0,r=8) R4=inv52 R5=pkt(id=0,off=14,r=14) R10=fp
+19: r1 = *(u8 *)(r3 +4)
+The state of the register R3 is R3=pkt(id=2,off=0,r=8)
+id=2 means that two 'r3 += rX' instructions were seen, so r3 points to some
+offset within a packet and since the program author did
+'if (r3 + 8 > r1) goto err' at insn #18, the safe range is [R3, R3 + 8).
+The verifier only allows 'add' operation on packet registers. Any other
+operation will set the register state to 'unknown_value' and it won't be
+available for direct packet access.
+Operation 'r3 += rX' may overflow and become less than original skb->data,
+therefore the verifier has to prevent that. So it tracks the number of
+upper zero bits in all 'uknown_value' registers, so when it sees
+'r3 += rX' instruction and rX is more than 16-bit value, it will error as:
+"cannot add integer value with N upper zero bits to ptr_to_packet"
+Ex. after insn 'r4 = *(u8 *)(r3 +12)' (insn #7 above) the state of r4 is
+R4=inv56 which means that upper 56 bits on the register are guaranteed
+to be zero. After insn 'r4 *= 14' the state becomes R4=inv52, since
+multiplying 8-bit value by constant 14 will keep upper 52 bits as zero.
+Similarly 'r2 >>= 48' will make R2=inv48, since the shift is not sign
+extending. This logic is implemented in evaluate_reg_alu() function.
+
+The end result is that bpf program author can access packet directly
+using normal C code as:
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct eth_hdr *eth = data;
+ struct iphdr *iph = data + sizeof(*eth);
+ struct udphdr *udp = data + sizeof(*eth) + sizeof(*iph);
+
+ if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*udp) > data_end)
+ return 0;
+ if (eth->h_proto != htons(ETH_P_IP))
+ return 0;
+ if (iph->protocol != IPPROTO_UDP || iph->ihl != 5)
+ return 0;
+ if (udp->dest == 53 || udp->source == 9)
+ ...;
+which makes such programs easier to write comparing to LD_ABS insn
+and significantly faster.
+
eBPF maps
---------
'maps' is a generic storage of different types for sharing data between kernel
@@ -1293,5 +1374,5 @@ to give potential BPF hackers or security auditors a better overview of
the underlying architecture.
Jay Schulist <jschlst@samba.org>
-Daniel Borkmann <dborkman@redhat.com>
-Alexei Starovoitov <ast@plumgrid.com>
+Daniel Borkmann <daniel@iogearbox.net>
+Alexei Starovoitov <ast@kernel.org>
diff --git a/Documentation/networking/gen_stats.txt b/Documentation/networking/gen_stats.txt
index 70e6275b757a..ff630a87b511 100644
--- a/Documentation/networking/gen_stats.txt
+++ b/Documentation/networking/gen_stats.txt
@@ -33,7 +33,8 @@ my_dumping_routine(struct sk_buff *skb, ...)
{
struct gnet_dump dump;
- if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump) < 0)
+ if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump,
+ TCA_PAD) < 0)
goto rtattr_failure;
if (gnet_stats_copy_basic(&dump, &mystruct->bstats) < 0 ||
@@ -56,7 +57,8 @@ existing TLV types.
my_dumping_routine(struct sk_buff *skb, ...)
{
if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
- TCA_XSTATS, &mystruct->lock, &dump) < 0)
+ TCA_XSTATS, &mystruct->lock, &dump,
+ TCA_PAD) < 0)
goto rtattr_failure;
...
}
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index b183e2b606c8..6c7f365b1515 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -63,6 +63,16 @@ fwmark_reflect - BOOLEAN
fwmark of the packet they are replying to.
Default: 0
+fib_multipath_use_neigh - BOOLEAN
+ Use status of existing neighbor entry when determining nexthop for
+ multipath routes. If disabled, neighbor information is not used and
+ packets could be directed to a failed nexthop. Only valid for kernels
+ built with CONFIG_IP_ROUTE_MULTIPATH enabled.
+ Default: 0 (disabled)
+ Possible values:
+ 0 - disabled
+ 1 - enabled
+
route/max_size - INTEGER
Maximum number of routes allowed in the kernel. Increase
this when using large numbers of interfaces and/or routes.
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
index cf996394e466..14422f8fcdc4 100644
--- a/Documentation/networking/ipvlan.txt
+++ b/Documentation/networking/ipvlan.txt
@@ -8,7 +8,7 @@ Initial Release:
This is conceptually very similar to the macvlan driver with one major
exception of using L3 for mux-ing /demux-ing among slaves. This property makes
the master device share the L2 with it's slave devices. I have developed this
-driver in conjuntion with network namespaces and not sure if there is use case
+driver in conjunction with network namespaces and not sure if there is use case
outside of it.
@@ -42,7 +42,7 @@ out. In this mode the slaves will RX/TX multicast and broadcast (if applicable)
as well.
4.2 L3 mode:
- In this mode TX processing upto L3 happens on the stack instance attached
+ In this mode TX processing up to L3 happens on the stack instance attached
to the slave device and packets are switched to the stack instance of the
master device for the L2 processing and routing from that instance will be
used before packets are queued on the outbound device. In this mode the slaves
@@ -56,7 +56,7 @@ situations defines your use case then you can choose to use ipvlan -
(a) The Linux host that is connected to the external switch / router has
policy configured that allows only one mac per port.
(b) No of virtual devices created on a master exceed the mac capacity and
-puts the NIC in promiscous mode and degraded performance is a concern.
+puts the NIC in promiscuous mode and degraded performance is a concern.
(c) If the slave device is to be put into the hostile / untrusted network
namespace where L2 on the slave could be changed / misused.
diff --git a/Documentation/networking/mac80211-injection.txt b/Documentation/networking/mac80211-injection.txt
index ec8f934c2eb2..d58d78df9ca2 100644
--- a/Documentation/networking/mac80211-injection.txt
+++ b/Documentation/networking/mac80211-injection.txt
@@ -37,14 +37,27 @@ radiotap headers and used to control injection:
HT rate for the transmission (only for devices without own rate control).
Also some flags are parsed
- IEEE80211_TX_RC_SHORT_GI: use short guard interval
- IEEE80211_TX_RC_40_MHZ_WIDTH: send in HT40 mode
+ IEEE80211_RADIOTAP_MCS_SGI: use short guard interval
+ IEEE80211_RADIOTAP_MCS_BW_40: send in HT40 mode
* IEEE80211_RADIOTAP_DATA_RETRIES
number of retries when either IEEE80211_RADIOTAP_RATE or
IEEE80211_RADIOTAP_MCS was used
+ * IEEE80211_RADIOTAP_VHT
+
+ VHT mcs and number of streams used in the transmission (only for devices
+ without own rate control). Also other fields are parsed
+
+ flags field
+ IEEE80211_RADIOTAP_VHT_FLAG_SGI: use short guard interval
+
+ bandwidth field
+ 1: send using 40MHz channel width
+ 4: send using 80MHz channel width
+ 11: send using 160MHz channel width
+
The injection code can also skip all other currently defined radiotap fields
facilitating replay of captured radiotap headers directly.
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt
index f310edec8a77..7413eb05223b 100644
--- a/Documentation/networking/netdev-features.txt
+++ b/Documentation/networking/netdev-features.txt
@@ -131,13 +131,11 @@ stack. Driver should not change behaviour based on them.
* LLTX driver (deprecated for hardware drivers)
-NETIF_F_LLTX should be set in drivers that implement their own locking in
-transmit path or don't need locking at all (e.g. software tunnels).
-In ndo_start_xmit, it is recommended to use a try_lock and return
-NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly
-protect against other callbacks (the rules you need to find out).
+NETIF_F_LLTX is meant to be used by drivers that don't need locking at all,
+e.g. software tunnels.
-Don't use it for new drivers.
+This is also used in a few legacy drivers that implement their
+own locking, don't use it for new (hardware) drivers.
* netns-local device
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index 0b1cf6b2a592..7fec2061a334 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -69,10 +69,9 @@ ndo_start_xmit:
When the driver sets NETIF_F_LLTX in dev->features this will be
called without holding netif_tx_lock. In this case the driver
- has to lock by itself when needed. It is recommended to use a try lock
- for this and return NETDEV_TX_LOCKED when the spin lock fails.
- The locking there should also properly protect against
- set_rx_mode. Note that the use of NETIF_F_LLTX is deprecated.
+ has to lock by itself when needed.
+ The locking there should also properly protect against
+ set_rx_mode. WARNING: use of NETIF_F_LLTX is deprecated.
Don't use it for new drivers.
Context: Process with BHs disabled or BH (timer),
@@ -83,8 +82,6 @@ ndo_start_xmit:
o NETDEV_TX_BUSY Cannot transmit packet, try later
Usually a bug, means queue start/stop flow control is broken in
the driver. Note: the driver must NOT put the skb in its DMA ring.
- o NETDEV_TX_LOCKED Locking failed, please retry quickly.
- Only valid when NETIF_F_LLTX is set.
ndo_tx_timeout:
Synchronization: netif_tx_lock spinlock; all TX queues frozen.
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index f4be85e96005..2c4e3354e128 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -67,12 +67,12 @@ The two basic thread commands are:
* add_device DEVICE@NAME -- adds a single device
* rem_device_all -- remove all associated devices
-When adding a device to a thread, a corrosponding procfile is created
+When adding a device to a thread, a corresponding procfile is created
which is used for configuring this device. Thus, device names need to
be unique.
To support adding the same device to multiple threads, which is useful
-with multi queue NICs, a the device naming scheme is extended with "@":
+with multi queue NICs, the device naming scheme is extended with "@":
device@something
The part after "@" can be anything, but it is custom to use the thread
@@ -221,7 +221,7 @@ Sample scripts
A collection of tutorial scripts and helpers for pktgen is in the
samples/pktgen directory. The helper parameters.sh file support easy
-and consistant parameter parsing across the sample scripts.
+and consistent parameter parsing across the sample scripts.
Usage example and help:
./pktgen_sample01_simple.sh -i eth4 -m 00:1B:21:3C:9D:F8 -d 192.168.8.2
diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt
new file mode 100644
index 000000000000..f200467ade38
--- /dev/null
+++ b/Documentation/networking/segmentation-offloads.txt
@@ -0,0 +1,130 @@
+Segmentation Offloads in the Linux Networking Stack
+
+Introduction
+============
+
+This document describes a set of techniques in the Linux networking stack
+to take advantage of segmentation offload capabilities of various NICs.
+
+The following technologies are described:
+ * TCP Segmentation Offload - TSO
+ * UDP Fragmentation Offload - UFO
+ * IPIP, SIT, GRE, and UDP Tunnel Offloads
+ * Generic Segmentation Offload - GSO
+ * Generic Receive Offload - GRO
+ * Partial Generic Segmentation Offload - GSO_PARTIAL
+
+TCP Segmentation Offload
+========================
+
+TCP segmentation allows a device to segment a single frame into multiple
+frames with a data payload size specified in skb_shinfo()->gso_size.
+When TCP segmentation requested the bit for either SKB_GSO_TCP or
+SKB_GSO_TCP6 should be set in skb_shinfo()->gso_type and
+skb_shinfo()->gso_size should be set to a non-zero value.
+
+TCP segmentation is dependent on support for the use of partial checksum
+offload. For this reason TSO is normally disabled if the Tx checksum
+offload for a given device is disabled.
+
+In order to support TCP segmentation offload it is necessary to populate
+the network and transport header offsets of the skbuff so that the device
+drivers will be able determine the offsets of the IP or IPv6 header and the
+TCP header. In addition as CHECKSUM_PARTIAL is required csum_start should
+also point to the TCP header of the packet.
+
+For IPv4 segmentation we support one of two types in terms of the IP ID.
+The default behavior is to increment the IP ID with every segment. If the
+GSO type SKB_GSO_TCP_FIXEDID is specified then we will not increment the IP
+ID and all segments will use the same IP ID. If a device has
+NETIF_F_TSO_MANGLEID set then the IP ID can be ignored when performing TSO
+and we will either increment the IP ID for all frames, or leave it at a
+static value based on driver preference.
+
+UDP Fragmentation Offload
+=========================
+
+UDP fragmentation offload allows a device to fragment an oversized UDP
+datagram into multiple IPv4 fragments. Many of the requirements for UDP
+fragmentation offload are the same as TSO. However the IPv4 ID for
+fragments should not increment as a single IPv4 datagram is fragmented.
+
+IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads
+========================================================
+
+In addition to the offloads described above it is possible for a frame to
+contain additional headers such as an outer tunnel. In order to account
+for such instances an additional set of segmentation offload types were
+introduced including SKB_GSO_IPIP, SKB_GSO_SIT, SKB_GSO_GRE, and
+SKB_GSO_UDP_TUNNEL. These extra segmentation types are used to identify
+cases where there are more than just 1 set of headers. For example in the
+case of IPIP and SIT we should have the network and transport headers moved
+from the standard list of headers to "inner" header offsets.
+
+Currently only two levels of headers are supported. The convention is to
+refer to the tunnel headers as the outer headers, while the encapsulated
+data is normally referred to as the inner headers. Below is the list of
+calls to access the given headers:
+
+IPIP/SIT Tunnel:
+ Outer Inner
+MAC skb_mac_header
+Network skb_network_header skb_inner_network_header
+Transport skb_transport_header
+
+UDP/GRE Tunnel:
+ Outer Inner
+MAC skb_mac_header skb_inner_mac_header
+Network skb_network_header skb_inner_network_header
+Transport skb_transport_header skb_inner_transport_header
+
+In addition to the above tunnel types there are also SKB_GSO_GRE_CSUM and
+SKB_GSO_UDP_TUNNEL_CSUM. These two additional tunnel types reflect the
+fact that the outer header also requests to have a non-zero checksum
+included in the outer header.
+
+Finally there is SKB_GSO_REMCSUM which indicates that a given tunnel header
+has requested a remote checksum offload. In this case the inner headers
+will be left with a partial checksum and only the outer header checksum
+will be computed.
+
+Generic Segmentation Offload
+============================
+
+Generic segmentation offload is a pure software offload that is meant to
+deal with cases where device drivers cannot perform the offloads described
+above. What occurs in GSO is that a given skbuff will have its data broken
+out over multiple skbuffs that have been resized to match the MSS provided
+via skb_shinfo()->gso_size.
+
+Before enabling any hardware segmentation offload a corresponding software
+offload is required in GSO. Otherwise it becomes possible for a frame to
+be re-routed between devices and end up being unable to be transmitted.
+
+Generic Receive Offload
+=======================
+
+Generic receive offload is the complement to GSO. Ideally any frame
+assembled by GRO should be segmented to create an identical sequence of
+frames using GSO, and any sequence of frames segmented by GSO should be
+able to be reassembled back to the original by GRO. The only exception to
+this is IPv4 ID in the case that the DF bit is set for a given IP header.
+If the value of the IPv4 ID is not sequentially incrementing it will be
+altered so that it is when a frame assembled via GRO is segmented via GSO.
+
+Partial Generic Segmentation Offload
+====================================
+
+Partial generic segmentation offload is a hybrid between TSO and GSO. What
+it effectively does is take advantage of certain traits of TCP and tunnels
+so that instead of having to rewrite the packet headers for each segment
+only the inner-most transport header and possibly the outer-most network
+header need to be updated. This allows devices that do not support tunnel
+offloads or tunnel offloads with checksum to still make use of segmentation.
+
+With the partial offload what occurs is that all headers excluding the
+inner transport header are updated such that they will contain the correct
+values for if the header was simply duplicated. The one exception to this
+is the outer IPv4 ID field. It is up to the device drivers to guarantee
+that the IPv4 ID field is incremented in the case that a given header does
+not have the DF bit set.
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index d64a14714236..671fe3dd56d3 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -1,6 +1,6 @@
STMicroelectronics 10/100/1000 Synopsys Ethernet driver
-Copyright (C) 2007-2014 STMicroelectronics Ltd
+Copyright (C) 2007-2015 STMicroelectronics Ltd
Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
@@ -138,6 +138,8 @@ struct plat_stmmacenet_data {
int (*init)(struct platform_device *pdev, void *priv);
void (*exit)(struct platform_device *pdev, void *priv);
void *bsp_priv;
+ int has_gmac4;
+ bool tso_en;
};
Where:
@@ -181,6 +183,8 @@ Where:
registers. init/exit callbacks should not use or modify
platform data.
o bsp_priv: another private pointer.
+ o has_gmac4: uses GMAC4 core.
+ o tso_en: Enables TSO (TCP Segmentation Offload) feature.
For MDIO bus The we have:
@@ -278,6 +282,13 @@ Please see the following document:
o stmmac_ethtool.c: to implement the ethtool support;
o stmmac.h: private driver structure;
o common.h: common definitions and VFTs;
+ o mmc_core.c/mmc.h: Management MAC Counters;
+ o stmmac_hwtstamp.c: HW timestamp support for PTP;
+ o stmmac_ptp.c: PTP 1588 clock;
+ o dwmac-<XXX>.c: these are for the platform glue-logic file; e.g. dwmac-sti.c
+ for STMicroelectronics SoCs.
+
+- GMAC 3.x
o descs.h: descriptor structure definitions;
o dwmac1000_core.c: dwmac GiGa core functions;
o dwmac1000_dma.c: dma functions for the GMAC chip;
@@ -289,11 +300,32 @@ Please see the following document:
o enh_desc.c: functions for handling enhanced descriptors;
o norm_desc.c: functions for handling normal descriptors;
o chain_mode.c/ring_mode.c:: functions to manage RING/CHAINED modes;
- o mmc_core.c/mmc.h: Management MAC Counters;
- o stmmac_hwtstamp.c: HW timestamp support for PTP;
- o stmmac_ptp.c: PTP 1588 clock;
- o dwmac-<XXX>.c: these are for the platform glue-logic file; e.g. dwmac-sti.c
- for STMicroelectronics SoCs.
+
+- GMAC4.x generation
+ o dwmac4_core.c: dwmac GMAC4.x core functions;
+ o dwmac4_desc.c: functions for handling GMAC4.x descriptors;
+ o dwmac4_descs.h: descriptor definitions;
+ o dwmac4_dma.c: dma functions for the GMAC4.x chip;
+ o dwmac4_dma.h: dma definitions for the GMAC4.x chip;
+ o dwmac4.h: core definitions for the GMAC4.x chip;
+ o dwmac4_lib.c: generic GMAC4.x functions;
+
+4.12) TSO support (GMAC4.x)
+
+TSO (Tcp Segmentation Offload) feature is supported by GMAC 4.x chip family.
+When a packet is sent through TCP protocol, the TCP stack ensures that
+the SKB provided to the low level driver (stmmac in our case) matches with
+the maximum frame len (IP header + TCP header + payload <= 1500 bytes (for
+MTU set to 1500)). It means that if an application using TCP want to send a
+packet which will have a length (after adding headers) > 1514 the packet
+will be split in several TCP packets: The data payload is split and headers
+(TCP/IP ..) are added. It is done by software.
+
+When TSO is enabled, the TCP stack doesn't care about the maximum frame
+length and provide SKB packet to stmmac as it is. The GMAC IP will have to
+perform the segmentation by it self to match with maximum frame length.
+
+This feature can be enabled in device tree through "snps,tso" entry.
5) Debug Information
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index fad63136ee3e..31c39115834d 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -89,6 +89,18 @@ Typically, the management port is not participating in offloaded data plane and
is loaded with a different driver, such as a NIC driver, on the management port
device.
+Switch ID
+^^^^^^^^^
+
+The switchdev driver must implement the switchdev op switchdev_port_attr_get
+for SWITCHDEV_ATTR_ID_PORT_PARENT_ID for each port netdev, returning the same
+physical ID for each port of a switch. The ID must be unique between switches
+on the same system. The ID does not need to be unique between switches on
+different systems.
+
+The switch ID is used to locate ports on a switch and to know if aggregated
+ports belong to the same switch.
+
Port Netdev Naming
^^^^^^^^^^^^^^^^^^
@@ -104,25 +116,13 @@ external configuration. For example, if a physical 40G port is split logically
into 4 10G ports, resulting in 4 port netdevs, the device can give a unique
name for each port using port PHYS name. The udev rule would be:
-SUBSYSTEM=="net", ACTION=="add", DRIVER="<driver>", ATTR{phys_port_name}!="", \
- NAME="$attr{phys_port_name}"
+SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="<phys_switch_id>", \
+ ATTR{phys_port_name}!="", NAME="swX$attr{phys_port_name}"
Suggested naming convention is "swXpYsZ", where X is the switch name or ID, Y
is the port name or ID, and Z is the sub-port name or ID. For example, sw1p1s0
would be sub-port 0 on port 1 on switch 1.
-Switch ID
-^^^^^^^^^
-
-The switchdev driver must implement the switchdev op switchdev_port_attr_get
-for SWITCHDEV_ATTR_ID_PORT_PARENT_ID for each port netdev, returning the same
-physical ID for each port of a switch. The ID must be unique between switches
-on the same system. The ID does not need to be unique between switches on
-different systems.
-
-The switch ID is used to locate ports on a switch and to know if aggregated
-ports belong to the same switch.
-
Port Features
^^^^^^^^^^^^^
@@ -386,7 +386,7 @@ used. First phase is to "prepare" anything needed, including various checks,
memory allocation, etc. The goal is to handle the stuff that is not unlikely
to fail here. The second phase is to "commit" the actual changes.
-Switchdev provides an inftrastructure for sharing items (for example memory
+Switchdev provides an infrastructure for sharing items (for example memory
allocations) between the two phases.
The object created by a driver in "prepare" phase and it is queued up by:
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index a977339fbe0a..671cccf0dcd2 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -44,11 +44,17 @@ timeval of SO_TIMESTAMP (ms).
Supports multiple types of timestamp requests. As a result, this
socket option takes a bitmap of flags, not a boolean. In
- err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val, &val);
+ err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val,
+ sizeof(val));
val is an integer with any of the following bits set. Setting other
bit returns EINVAL and does not change the current state.
+The socket option configures timestamp generation for individual
+sk_buffs (1.3.1), timestamp reporting to the socket's error
+queue (1.3.2) and options (1.3.3). Timestamp generation can also
+be enabled for individual sendmsg calls using cmsg (1.3.4).
+
1.3.1 Timestamp Generation
@@ -71,13 +77,16 @@ SOF_TIMESTAMPING_RX_SOFTWARE:
kernel receive stack.
SOF_TIMESTAMPING_TX_HARDWARE:
- Request tx timestamps generated by the network adapter.
+ Request tx timestamps generated by the network adapter. This flag
+ can be enabled via both socket options and control messages.
SOF_TIMESTAMPING_TX_SOFTWARE:
Request tx timestamps when data leaves the kernel. These timestamps
are generated in the device driver as close as possible, but always
prior to, passing the packet to the network interface. Hence, they
require driver support and may not be available for all devices.
+ This flag can be enabled via both socket options and control messages.
+
SOF_TIMESTAMPING_TX_SCHED:
Request tx timestamps prior to entering the packet scheduler. Kernel
@@ -90,7 +99,8 @@ SOF_TIMESTAMPING_TX_SCHED:
machines with virtual devices where a transmitted packet travels
through multiple devices and, hence, multiple packet schedulers,
a timestamp is generated at each layer. This allows for fine
- grained measurement of queuing delay.
+ grained measurement of queuing delay. This flag can be enabled
+ via both socket options and control messages.
SOF_TIMESTAMPING_TX_ACK:
Request tx timestamps when all data in the send buffer has been
@@ -99,6 +109,7 @@ SOF_TIMESTAMPING_TX_ACK:
over-report measurement, because the timestamp is generated when all
data up to and including the buffer at send() was acknowledged: the
cumulative acknowledgment. The mechanism ignores SACK and FACK.
+ This flag can be enabled via both socket options and control messages.
1.3.2 Timestamp Reporting
@@ -183,6 +194,37 @@ having access to the contents of the original packet, so cannot be
combined with SOF_TIMESTAMPING_OPT_TSONLY.
+1.3.4. Enabling timestamps via control messages
+
+In addition to socket options, timestamp generation can be requested
+per write via cmsg, only for SOF_TIMESTAMPING_TX_* (see Section 1.3.1).
+Using this feature, applications can sample timestamps per sendmsg()
+without paying the overhead of enabling and disabling timestamps via
+setsockopt:
+
+ struct msghdr *msg;
+ ...
+ cmsg = CMSG_FIRSTHDR(msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SO_TIMESTAMPING;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+ *((__u32 *) CMSG_DATA(cmsg)) = SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK;
+ err = sendmsg(fd, msg, 0);
+
+The SOF_TIMESTAMPING_TX_* flags set via cmsg will override
+the SOF_TIMESTAMPING_TX_* flags set via setsockopt.
+
+Moreover, applications must still enable timestamp reporting via
+setsockopt to receive timestamps:
+
+ __u32 val = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID /* or any other flag */;
+ err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val,
+ sizeof(val));
+
+
1.4 Bytestream Timestamps
The SO_TIMESTAMPING interface supports timestamping of bytes in a
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
index d52aa10cfe91..5da679c573d2 100644
--- a/Documentation/networking/vrf.txt
+++ b/Documentation/networking/vrf.txt
@@ -41,7 +41,7 @@ using an rx_handler which gives the impression that packets flow through
the VRF device. Similarly on egress routing rules are used to send packets
to the VRF device driver before getting sent out the actual interface. This
allows tcpdump on a VRF device to capture all packets into and out of the
-VRF as a whole.[1] Similiarly, netfilter [2] and tc rules can be applied
+VRF as a whole.[1] Similarly, netfilter [2] and tc rules can be applied
using the VRF device to specify rules that apply to the VRF domain as a whole.
[1] Packets in the forwarded state do not flow through the device, so those
diff --git a/Documentation/networking/xfrm_sync.txt b/Documentation/networking/xfrm_sync.txt
index d7aac9dedeb4..8d88e0f2ec49 100644
--- a/Documentation/networking/xfrm_sync.txt
+++ b/Documentation/networking/xfrm_sync.txt
@@ -4,7 +4,7 @@ Krisztian <hidden@balabit.hu> and others and additional patches
from Jamal <hadi@cyberus.ca>.
The end goal for syncing is to be able to insert attributes + generate
-events so that the an SA can be safely moved from one machine to another
+events so that the SA can be safely moved from one machine to another
for HA purposes.
The idea is to synchronize the SA so that the takeover machine can do
the processing of the SA as accurate as possible if it has access to it.
@@ -13,7 +13,7 @@ We already have the ability to generate SA add/del/upd events.
These patches add ability to sync and have accurate lifetime byte (to
ensure proper decay of SAs) and replay counters to avoid replay attacks
with as minimal loss at failover time.
-This way a backup stays as closely uptodate as an active member.
+This way a backup stays as closely up-to-date as an active member.
Because the above items change for every packet the SA receives,
it is possible for a lot of the events to be generated.
@@ -163,7 +163,7 @@ If you have an SA that is getting hit by traffic in bursts such that
there is a period where the timer threshold expires with no packets
seen, then an odd behavior is seen as follows:
The first packet arrival after a timer expiry will trigger a timeout
-aevent; i.e we dont wait for a timeout period or a packet threshold
+event; i.e we don't wait for a timeout period or a packet threshold
to be reached. This is done for simplicity and efficiency reasons.
-JHS
diff --git a/Documentation/phy.txt b/Documentation/phy.txt
index b388c5af9e72..0aa994bd9a91 100644
--- a/Documentation/phy.txt
+++ b/Documentation/phy.txt
@@ -31,16 +31,28 @@ should provide its own implementation of of_xlate. of_xlate is used only for
dt boot case.
#define of_phy_provider_register(dev, xlate) \
- __of_phy_provider_register((dev), THIS_MODULE, (xlate))
+ __of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate))
#define devm_of_phy_provider_register(dev, xlate) \
- __devm_of_phy_provider_register((dev), THIS_MODULE, (xlate))
+ __devm_of_phy_provider_register((dev), NULL, THIS_MODULE, (xlate))
of_phy_provider_register and devm_of_phy_provider_register macros can be used to
register the phy_provider and it takes device and of_xlate as
arguments. For the dt boot case, all PHY providers should use one of the above
2 macros to register the PHY provider.
+Often the device tree nodes associated with a PHY provider will contain a set
+of children that each represent a single PHY. Some bindings may nest the child
+nodes within extra levels for context and extensibility, in which case the low
+level of_phy_provider_register_full() and devm_of_phy_provider_register_full()
+macros can be used to override the node containing the children.
+
+#define of_phy_provider_register_full(dev, children, xlate) \
+ __of_phy_provider_register(dev, children, THIS_MODULE, xlate)
+
+#define devm_of_phy_provider_register_full(dev, children, xlate) \
+ __devm_of_phy_provider_register_full(dev, children, THIS_MODULE, xlate)
+
void devm_of_phy_provider_unregister(struct device *dev,
struct phy_provider *phy_provider);
void of_phy_provider_unregister(struct phy_provider *phy_provider);
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 7328cf85236c..1fd1fbe9ce95 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -586,6 +586,10 @@ drivers to make their ->remove() callbacks avoid races with runtime PM directly,
but also it allows of more flexibility in the handling of devices during the
removal of their drivers.
+Drivers in ->remove() callback should undo the runtime PM changes done
+in ->probe(). Usually this means calling pm_runtime_disable(),
+pm_runtime_dont_use_autosuspend() etc.
+
The user space can effectively disallow the driver of the device to power manage
it at run time by changing the value of its /sys/devices/.../power/control
attribute to "on", which causes pm_runtime_forbid() to be called. In principle,
diff --git a/Documentation/rpmsg.txt b/Documentation/rpmsg.txt
index f7edc3aa1e92..a95e36a43288 100644
--- a/Documentation/rpmsg.txt
+++ b/Documentation/rpmsg.txt
@@ -249,24 +249,12 @@ MODULE_DEVICE_TABLE(rpmsg, rpmsg_driver_sample_id_table);
static struct rpmsg_driver rpmsg_sample_client = {
.drv.name = KBUILD_MODNAME,
- .drv.owner = THIS_MODULE,
.id_table = rpmsg_driver_sample_id_table,
.probe = rpmsg_sample_probe,
.callback = rpmsg_sample_cb,
.remove = rpmsg_sample_remove,
};
-
-static int __init init(void)
-{
- return register_rpmsg_driver(&rpmsg_sample_client);
-}
-module_init(init);
-
-static void __exit fini(void)
-{
- unregister_rpmsg_driver(&rpmsg_sample_client);
-}
-module_exit(fini);
+module_rpmsg_driver(rpmsg_sample_client);
Note: a similar sample which can be built and loaded can be found
in samples/rpmsg/.
diff --git a/Documentation/scsi/g_NCR5380.txt b/Documentation/scsi/g_NCR5380.txt
index 3b80f567f818..fd880150aeea 100644
--- a/Documentation/scsi/g_NCR5380.txt
+++ b/Documentation/scsi/g_NCR5380.txt
@@ -23,11 +23,10 @@ supported by the driver.
If the default configuration does not work for you, you can use the kernel
command lines (eg using the lilo append command):
- ncr5380=port,irq,dma
- ncr53c400=port,irq
-or
- ncr5380=base,irq,dma
- ncr53c400=base,irq
+ ncr5380=addr,irq
+ ncr53c400=addr,irq
+ ncr53c400a=addr,irq
+ dtc3181e=addr,irq
The driver does not probe for any addresses or ports other than those in
the OVERRIDE or given to the kernel as above.
@@ -36,19 +35,17 @@ This driver provides some information on what it has detected in
/proc/scsi/g_NCR5380/x where x is the scsi card number as detected at boot
time. More info to come in the future.
-When NCR53c400 support is compiled in, BIOS parameters will be returned by
-the driver (the raw 5380 driver does not and I don't plan to fiddle with
-it!).
-
This driver works as a module.
When included as a module, parameters can be passed on the insmod/modprobe
command line:
ncr_irq=xx the interrupt
ncr_addr=xx the port or base address (for port or memory
mapped, resp.)
- ncr_dma=xx the DMA
ncr_5380=1 to set up for a NCR5380 board
ncr_53c400=1 to set up for a NCR53C400 board
+ ncr_53c400a=1 to set up for a NCR53C400A board
+ dtc_3181e=1 to set up for a Domex Technology Corp 3181E board
+ hp_c2502=1 to set up for a Hewlett Packard C2502 board
e.g.
modprobe g_NCR5380 ncr_irq=5 ncr_addr=0x350 ncr_5380=1
for a port mapped NCR5380 board or
diff --git a/Documentation/scsi/scsi-parameters.txt b/Documentation/scsi/scsi-parameters.txt
index 2bfd6f6d2d3d..1241ac11edb1 100644
--- a/Documentation/scsi/scsi-parameters.txt
+++ b/Documentation/scsi/scsi-parameters.txt
@@ -27,13 +27,15 @@ parameters may be changed at runtime by the command
aic79xx= [HW,SCSI]
See Documentation/scsi/aic79xx.txt.
- atascsi= [HW,SCSI] Atari SCSI
+ atascsi= [HW,SCSI]
+ See drivers/scsi/atari_scsi.c.
BusLogic= [HW,SCSI]
See drivers/scsi/BusLogic.c, comment before function
BusLogic_ParseDriverOptions().
dtc3181e= [HW,SCSI]
+ See Documentation/scsi/g_NCR5380.txt.
eata= [HW,SCSI]
@@ -51,8 +53,8 @@ parameters may be changed at runtime by the command
ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
See header of drivers/scsi/ips.c.
- mac5380= [HW,SCSI] Format:
- <can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
+ mac5380= [HW,SCSI]
+ See drivers/scsi/mac_scsi.c.
max_luns= [SCSI] Maximum number of LUNs to probe.
Should be between 1 and 2^32-1.
@@ -65,10 +67,13 @@ parameters may be changed at runtime by the command
See header of drivers/scsi/NCR_D700.c.
ncr5380= [HW,SCSI]
+ See Documentation/scsi/g_NCR5380.txt.
ncr53c400= [HW,SCSI]
+ See Documentation/scsi/g_NCR5380.txt.
ncr53c400a= [HW,SCSI]
+ See Documentation/scsi/g_NCR5380.txt.
ncr53c406a= [HW,SCSI]
diff --git a/Documentation/security/LoadPin.txt b/Documentation/security/LoadPin.txt
new file mode 100644
index 000000000000..e11877f5d3d4
--- /dev/null
+++ b/Documentation/security/LoadPin.txt
@@ -0,0 +1,17 @@
+LoadPin is a Linux Security Module that ensures all kernel-loaded files
+(modules, firmware, etc) all originate from the same filesystem, with
+the expectation that such a filesystem is backed by a read-only device
+such as dm-verity or CDROM. This allows systems that have a verified
+and/or unchangeable filesystem to enforce module and firmware loading
+restrictions without needing to sign the files individually.
+
+The LSM is selectable at build-time with CONFIG_SECURITY_LOADPIN, and
+can be controlled at boot-time with the kernel command line option
+"loadpin.enabled". By default, it is enabled, but can be disabled at
+boot ("loadpin.enabled=0").
+
+LoadPin starts pinning when it sees the first file loaded. If the
+block device backing the filesystem is not read-only, a sysctl is
+created to toggle pinning: /proc/sys/kernel/loadpin/enabled. (Having
+a mutable filesystem means pinning is mutable too, but having the
+sysctl allows for easy testing on systems with a mutable filesystem.)
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
index 8c183873b2b7..20d05719bceb 100644
--- a/Documentation/security/keys.txt
+++ b/Documentation/security/keys.txt
@@ -823,6 +823,36 @@ The keyctl syscall functions are:
A process must have search permission on the key for this function to be
successful.
+ (*) Compute a Diffie-Hellman shared secret or public key
+
+ long keyctl(KEYCTL_DH_COMPUTE, struct keyctl_dh_params *params,
+ char *buffer, size_t buflen);
+
+ The params struct contains serial numbers for three keys:
+
+ - The prime, p, known to both parties
+ - The local private key
+ - The base integer, which is either a shared generator or the
+ remote public key
+
+ The value computed is:
+
+ result = base ^ private (mod prime)
+
+ If the base is the shared generator, the result is the local
+ public key. If the base is the remote public key, the result is
+ the shared secret.
+
+ The buffer length must be at least the length of the prime, or zero.
+
+ If the buffer length is nonzero, the length of the result is
+ returned when it is successfully calculated and copied in to the
+ buffer. When the buffer length is zero, the minimum required
+ buffer length is returned.
+
+ This function will return error EOPNOTSUPP if the key type is not
+ supported, error ENOKEY if the key could not be found, or error
+ EACCES if the key is not readable by the caller.
===============
KERNEL SERVICES
@@ -999,6 +1029,10 @@ payload contents" for more information.
struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
const struct cred *cred,
key_perm_t perm,
+ int (*restrict_link)(struct key *,
+ const struct key_type *,
+ unsigned long,
+ const union key_payload *),
unsigned long flags,
struct key *dest);
@@ -1010,6 +1044,24 @@ payload contents" for more information.
KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted
towards the user's quota). Error ENOMEM can also be returned.
+ If restrict_link not NULL, it should point to a function that will be
+ called each time an attempt is made to link a key into the new keyring.
+ This function is called to check whether a key may be added into the keying
+ or not. Callers of key_create_or_update() within the kernel can pass
+ KEY_ALLOC_BYPASS_RESTRICTION to suppress the check. An example of using
+ this is to manage rings of cryptographic keys that are set up when the
+ kernel boots where userspace is also permitted to add keys - provided they
+ can be verified by a key the kernel already has.
+
+ When called, the restriction function will be passed the keyring being
+ added to, the key flags value and the type and payload of the key being
+ added. Note that when a new key is being created, this is called between
+ payload preparsing and actual key creation. The function should return 0
+ to allow the link or an error to reject it.
+
+ A convenience function, restrict_link_reject, exists to always return
+ -EPERM to in this case.
+
(*) To check the validity of a key, this function can be called:
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
index e7193aac669c..d4510ebf2e8c 100644
--- a/Documentation/sound/alsa/HD-Audio.txt
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -655,17 +655,6 @@ development branches in general while the development for the current
and next kernels are found in for-linus and for-next branches,
respectively.
-If you are using the latest Linus tree, it'd be better to pull the
-above GIT tree onto it. If you are using the older kernels, an easy
-way to try the latest ALSA code is to build from the snapshot
-tarball. There are daily tarballs and the latest snapshot tarball.
-All can be built just like normal alsa-driver release packages, that
-is, installed via the usual spells: configure, make and make
-install(-modules). See INSTALL in the package. The snapshot tarballs
-are found at:
-
-- ftp://ftp.suse.com/pub/people/tiwai/snapshot/
-
Sending a Bug Report
~~~~~~~~~~~~~~~~~~~~
@@ -699,7 +688,12 @@ problems.
alsa-info
~~~~~~~~~
The script `alsa-info.sh` is a very useful tool to gather the audio
-device information. You can fetch the latest version from:
+device information. It's included in alsa-utils package. The latest
+version can be found on git repository:
+
+- git://git.alsa-project.org/alsa-utils.git
+
+The script can be fetched directly from the following URL, too:
- http://www.alsa-project.org/alsa-info.sh
@@ -836,15 +830,11 @@ can get a proc-file dump at the current state, get a list of control
(mixer) elements, set/get the control element value, simulate the PCM
operation, the jack plugging simulation, etc.
-The package is found in:
-
-- ftp://ftp.suse.com/pub/people/tiwai/misc/
-
-A git repository is available:
+The program is found in the git repository below:
- git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/hda-emu.git
-See README file in the tarball for more details about hda-emu
+See README file in the repository for more details about hda-emu
program.
diff --git a/Documentation/sound/alsa/compress_offload.txt b/Documentation/sound/alsa/compress_offload.txt
index 630c492c3dc2..8ba556a131c3 100644
--- a/Documentation/sound/alsa/compress_offload.txt
+++ b/Documentation/sound/alsa/compress_offload.txt
@@ -149,7 +149,7 @@ Gapless Playback
================
When playing thru an album, the decoders have the ability to skip the encoder
delay and padding and directly move from one track content to another. The end
-user can perceive this as gapless playback as we dont have silence while
+user can perceive this as gapless playback as we don't have silence while
switching from one track to another
Also, there might be low-intensity noises due to encoding. Perfect gapless is
@@ -184,7 +184,7 @@ Sequence flow for gapless would be:
- Fill data of the first track
- Trigger start
- User-space finished sending all,
-- Indicaite next track data by sending set_next_track
+- Indicate next track data by sending set_next_track
- Set metadata of the next track
- then call partial_drain to flush most of buffer in DSP
- Fill data of the next track
diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt
index 6faab4880006..c45bd79f291e 100644
--- a/Documentation/sound/alsa/soc/dapm.txt
+++ b/Documentation/sound/alsa/soc/dapm.txt
@@ -132,7 +132,7 @@ SOC_DAPM_SINGLE("HiFi Playback Switch", WM8731_APANA, 4, 1, 0),
SND_SOC_DAPM_MIXER("Output Mixer", WM8731_PWR, 4, 1, wm8731_output_mixer_controls,
ARRAY_SIZE(wm8731_output_mixer_controls)),
-If you dont want the mixer elements prefixed with the name of the mixer widget,
+If you don't want the mixer elements prefixed with the name of the mixer widget,
you can use SND_SOC_DAPM_MIXER_NAMED_CTL instead. the parameters are the same
as for SND_SOC_DAPM_MIXER.
diff --git a/Documentation/sound/alsa/soc/overview.txt b/Documentation/sound/alsa/soc/overview.txt
index ff88f52eec98..f3f28b7ae242 100644
--- a/Documentation/sound/alsa/soc/overview.txt
+++ b/Documentation/sound/alsa/soc/overview.txt
@@ -63,7 +63,7 @@ multiple re-usable component drivers :-
and any audio DSP drivers for that platform.
* Machine class driver: The machine driver class acts as the glue that
- decribes and binds the other component drivers together to form an ALSA
+ describes and binds the other component drivers together to form an ALSA
"sound card device". It handles any machine specific controls and
machine level audio events (e.g. turning on an amp at start of playback).
diff --git a/Documentation/sound/alsa/timestamping.txt b/Documentation/sound/alsa/timestamping.txt
index 0b191a23f534..1b6473f393a8 100644
--- a/Documentation/sound/alsa/timestamping.txt
+++ b/Documentation/sound/alsa/timestamping.txt
@@ -129,7 +129,7 @@ will be required to issue multiple queries and perform an
interpolation of the results
In some hardware-specific configuration, the system timestamp is
-latched by a low-level audio subsytem, and the information provided
+latched by a low-level audio subsystem, and the information provided
back to the driver. Due to potential delays in the communication with
the hardware, there is a risk of misalignment with the avail and delay
information. To make sure applications are not confused, a
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 57653a44b128..daabdd7ee543 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -60,6 +60,7 @@ show up in /proc/sys/kernel:
- panic_on_warn
- perf_cpu_time_max_percent
- perf_event_paranoid
+- perf_event_max_stack
- pid_max
- powersave-nap [ PPC only ]
- printk
@@ -645,7 +646,7 @@ allowed to execute.
perf_event_paranoid:
Controls use of the performance events system by unprivileged
-users (without CAP_SYS_ADMIN). The default value is 1.
+users (without CAP_SYS_ADMIN). The default value is 2.
-1: Allow use of (almost) all events by all users
>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK
@@ -654,6 +655,19 @@ users (without CAP_SYS_ADMIN). The default value is 1.
==============================================================
+perf_event_max_stack:
+
+Controls maximum number of stack frames to copy for (attr.sample_type &
+PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
+'perf record -g' or 'perf trace --call-graph fp'.
+
+This can only be done when no events are in use that have callchains
+enabled, otherwise writing to this file will return -EBUSY.
+
+The default value is 127.
+
+==============================================================
+
pid_max:
PID allocation wrap value. When the kernel's next PID value
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 809ab6efcc74..f0480f7ea740 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -43,6 +43,17 @@ Values :
1 - enable the JIT
2 - enable the JIT and ask the compiler to emit traces on kernel log.
+bpf_jit_harden
+--------------
+
+This enables hardening for the Berkeley Packet Filter Just in Time compiler.
+Supported are eBPF JIT backends. Enabling hardening trades off performance,
+but can mitigate JIT spraying.
+Values :
+ 0 - disable JIT hardening (default value)
+ 1 - enable JIT hardening for unprivileged users only
+ 2 - enable JIT hardening for all users
+
dev_weight
--------------
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index cb0368459da3..34a5fece3121 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -581,15 +581,16 @@ Specify "[Nn]ode" for node order
"Zone Order" orders the zonelists by zone type, then by node within each
zone. Specify "[Zz]one" for zone order.
-Specify "[Dd]efault" to request automatic configuration. Autoconfiguration
-will select "node" order in following case.
-(1) if the DMA zone does not exist or
-(2) if the DMA zone comprises greater than 50% of the available memory or
-(3) if any node's DMA zone comprises greater than 70% of its local memory and
- the amount of local memory is big enough.
-
-Otherwise, "zone" order will be selected. Default order is recommended unless
-this is causing problems for your system/application.
+Specify "[Dd]efault" to request automatic configuration.
+
+On 32-bit, the Normal zone needs to be preserved for allocations accessible
+by the kernel, so "zone" order will be selected.
+
+On 64-bit, devices that require DMA32/DMA are relatively rare, so "node"
+order will be selected.
+
+Default order is recommended unless this is causing problems for your
+system/application.
==============================================================
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
index c010be8c85d7..08d74d75150d 100644
--- a/Documentation/trace/events.txt
+++ b/Documentation/trace/events.txt
@@ -512,3 +512,1558 @@ The following commands are supported:
Note that there can be only one traceon or traceoff trigger per
triggering event.
+
+- hist
+
+ This command aggregates event hits into a hash table keyed on one or
+ more trace event format fields (or stacktrace) and a set of running
+ totals derived from one or more trace event format fields and/or
+ event counts (hitcount).
+
+ The format of a hist trigger is as follows:
+
+ hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>]
+ [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue]
+ [:clear][:name=histname1] [if <filter>]
+
+ When a matching event is hit, an entry is added to a hash table
+ using the key(s) and value(s) named. Keys and values correspond to
+ fields in the event's format description. Values must correspond to
+ numeric fields - on an event hit, the value(s) will be added to a
+ sum kept for that field. The special string 'hitcount' can be used
+ in place of an explicit value field - this is simply a count of
+ event hits. If 'values' isn't specified, an implicit 'hitcount'
+ value will be automatically created and used as the only value.
+ Keys can be any field, or the special string 'stacktrace', which
+ will use the event's kernel stacktrace as the key. The keywords
+ 'keys' or 'key' can be used to specify keys, and the keywords
+ 'values', 'vals', or 'val' can be used to specify values. Compound
+ keys consisting of up to two fields can be specified by the 'keys'
+ keyword. Hashing a compound key produces a unique entry in the
+ table for each unique combination of component keys, and can be
+ useful for providing more fine-grained summaries of event data.
+ Additionally, sort keys consisting of up to two fields can be
+ specified by the 'sort' keyword. If more than one field is
+ specified, the result will be a 'sort within a sort': the first key
+ is taken to be the primary sort key and the second the secondary
+ key. If a hist trigger is given a name using the 'name' parameter,
+ its histogram data will be shared with other triggers of the same
+ name, and trigger hits will update this common data. Only triggers
+ with 'compatible' fields can be combined in this way; triggers are
+ 'compatible' if the fields named in the trigger share the same
+ number and type of fields and those fields also have the same names.
+ Note that any two events always share the compatible 'hitcount' and
+ 'stacktrace' fields and can therefore be combined using those
+ fields, however pointless that may be.
+
+ 'hist' triggers add a 'hist' file to each event's subdirectory.
+ Reading the 'hist' file for the event will dump the hash table in
+ its entirety to stdout. If there are multiple hist triggers
+ attached to an event, there will be a table for each trigger in the
+ output. The table displayed for a named trigger will be the same as
+ any other instance having the same name. Each printed hash table
+ entry is a simple list of the keys and values comprising the entry;
+ keys are printed first and are delineated by curly braces, and are
+ followed by the set of value fields for the entry. By default,
+ numeric fields are displayed as base-10 integers. This can be
+ modified by appending any of the following modifiers to the field
+ name:
+
+ .hex display a number as a hex value
+ .sym display an address as a symbol
+ .sym-offset display an address as a symbol and offset
+ .syscall display a syscall id as a system call name
+ .execname display a common_pid as a program name
+
+ Note that in general the semantics of a given field aren't
+ interpreted when applying a modifier to it, but there are some
+ restrictions to be aware of in this regard:
+
+ - only the 'hex' modifier can be used for values (because values
+ are essentially sums, and the other modifiers don't make sense
+ in that context).
+ - the 'execname' modifier can only be used on a 'common_pid'. The
+ reason for this is that the execname is simply the 'comm' value
+ saved for the 'current' process when an event was triggered,
+ which is the same as the common_pid value saved by the event
+ tracing code. Trying to apply that comm value to other pid
+ values wouldn't be correct, and typically events that care save
+ pid-specific comm fields in the event itself.
+
+ A typical usage scenario would be the following to enable a hist
+ trigger, read its current contents, and then turn it off:
+
+ # echo 'hist:keys=skbaddr.hex:vals=len' > \
+ /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+ # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+ # echo '!hist:keys=skbaddr.hex:vals=len' > \
+ /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+ The trigger file itself can be read to show the details of the
+ currently attached hist trigger. This information is also displayed
+ at the top of the 'hist' file when read.
+
+ By default, the size of the hash table is 2048 entries. The 'size'
+ parameter can be used to specify more or fewer than that. The units
+ are in terms of hashtable entries - if a run uses more entries than
+ specified, the results will show the number of 'drops', the number
+ of hits that were ignored. The size should be a power of 2 between
+ 128 and 131072 (any non- power-of-2 number specified will be rounded
+ up).
+
+ The 'sort' parameter can be used to specify a value field to sort
+ on. The default if unspecified is 'hitcount' and the default sort
+ order is 'ascending'. To sort in the opposite direction, append
+ .descending' to the sort key.
+
+ The 'pause' parameter can be used to pause an existing hist trigger
+ or to start a hist trigger but not log any events until told to do
+ so. 'continue' or 'cont' can be used to start or restart a paused
+ hist trigger.
+
+ The 'clear' parameter will clear the contents of a running hist
+ trigger and leave its current paused/active state.
+
+ Note that the 'pause', 'cont', and 'clear' parameters should be
+ applied using 'append' shell operator ('>>') if applied to an
+ existing trigger, rather than via the '>' operator, which will cause
+ the trigger to be removed through truncation.
+
+- enable_hist/disable_hist
+
+ The enable_hist and disable_hist triggers can be used to have one
+ event conditionally start and stop another event's already-attached
+ hist trigger. Any number of enable_hist and disable_hist triggers
+ can be attached to a given event, allowing that event to kick off
+ and stop aggregations on a host of other events.
+
+ The format is very similar to the enable/disable_event triggers:
+
+ enable_hist:<system>:<event>[:count]
+ disable_hist:<system>:<event>[:count]
+
+ Instead of enabling or disabling the tracing of the target event
+ into the trace buffer as the enable/disable_event triggers do, the
+ enable/disable_hist triggers enable or disable the aggregation of
+ the target event into a hash table.
+
+ A typical usage scenario for the enable_hist/disable_hist triggers
+ would be to first set up a paused hist trigger on some event,
+ followed by an enable_hist/disable_hist pair that turns the hist
+ aggregation on and off when conditions of interest are hit:
+
+ # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+ # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
+ /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+ # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
+ /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+ The above sets up an initially paused hist trigger which is unpaused
+ and starts aggregating events when a given program is executed, and
+ which stops aggregating when the process exits and the hist trigger
+ is paused again.
+
+ The examples below provide a more concrete illustration of the
+ concepts and typical usage patterns discussed above.
+
+
+6.2 'hist' trigger examples
+---------------------------
+
+ The first set of examples creates aggregations using the kmalloc
+ event. The fields that can be used for the hist trigger are listed
+ in the kmalloc event's format file:
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format
+ name: kmalloc
+ ID: 374
+ format:
+ field:unsigned short common_type; offset:0; size:2; signed:0;
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
+ field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
+ field:int common_pid; offset:4; size:4; signed:1;
+
+ field:unsigned long call_site; offset:8; size:8; signed:0;
+ field:const void * ptr; offset:16; size:8; signed:0;
+ field:size_t bytes_req; offset:24; size:8; signed:0;
+ field:size_t bytes_alloc; offset:32; size:8; signed:0;
+ field:gfp_t gfp_flags; offset:40; size:4; signed:0;
+
+ We'll start by creating a hist trigger that generates a simple table
+ that lists the total number of bytes requested for each function in
+ the kernel that made one or more calls to kmalloc:
+
+ # echo 'hist:key=call_site:val=bytes_req' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ This tells the tracing system to create a 'hist' trigger using the
+ call_site field of the kmalloc event as the key for the table, which
+ just means that each unique call_site address will have an entry
+ created for it in the table. The 'val=bytes_req' parameter tells
+ the hist trigger that for each unique entry (call_site) in the
+ table, it should keep a running total of the number of bytes
+ requested by that call_site.
+
+ We'll let it run for awhile and then dump the contents of the 'hist'
+ file in the kmalloc event's subdirectory (for readability, a number
+ of entries have been omitted):
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+ # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+ { call_site: 18446744072106379007 } hitcount: 1 bytes_req: 176
+ { call_site: 18446744071579557049 } hitcount: 1 bytes_req: 1024
+ { call_site: 18446744071580608289 } hitcount: 1 bytes_req: 16384
+ { call_site: 18446744071581827654 } hitcount: 1 bytes_req: 24
+ { call_site: 18446744071580700980 } hitcount: 1 bytes_req: 8
+ { call_site: 18446744071579359876 } hitcount: 1 bytes_req: 152
+ { call_site: 18446744071580795365 } hitcount: 3 bytes_req: 144
+ { call_site: 18446744071581303129 } hitcount: 3 bytes_req: 144
+ { call_site: 18446744071580713234 } hitcount: 4 bytes_req: 2560
+ { call_site: 18446744071580933750 } hitcount: 4 bytes_req: 736
+ .
+ .
+ .
+ { call_site: 18446744072106047046 } hitcount: 69 bytes_req: 5576
+ { call_site: 18446744071582116407 } hitcount: 73 bytes_req: 2336
+ { call_site: 18446744072106054684 } hitcount: 136 bytes_req: 140504
+ { call_site: 18446744072106224230 } hitcount: 136 bytes_req: 19584
+ { call_site: 18446744072106078074 } hitcount: 153 bytes_req: 2448
+ { call_site: 18446744072106062406 } hitcount: 153 bytes_req: 36720
+ { call_site: 18446744071582507929 } hitcount: 153 bytes_req: 37088
+ { call_site: 18446744072102520590 } hitcount: 273 bytes_req: 10920
+ { call_site: 18446744071582143559 } hitcount: 358 bytes_req: 716
+ { call_site: 18446744072106465852 } hitcount: 417 bytes_req: 56712
+ { call_site: 18446744072102523378 } hitcount: 485 bytes_req: 27160
+ { call_site: 18446744072099568646 } hitcount: 1676 bytes_req: 33520
+
+ Totals:
+ Hits: 4610
+ Entries: 45
+ Dropped: 0
+
+ The output displays a line for each entry, beginning with the key
+ specified in the trigger, followed by the value(s) also specified in
+ the trigger. At the beginning of the output is a line that displays
+ the trigger info, which can also be displayed by reading the
+ 'trigger' file:
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+ hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+ At the end of the output are a few lines that display the overall
+ totals for the run. The 'Hits' field shows the total number of
+ times the event trigger was hit, the 'Entries' field shows the total
+ number of used entries in the hash table, and the 'Dropped' field
+ shows the number of hits that were dropped because the number of
+ used entries for the run exceeded the maximum number of entries
+ allowed for the table (normally 0, but if not a hint that you may
+ want to increase the size of the table using the 'size' parameter).
+
+ Notice in the above output that there's an extra field, 'hitcount',
+ which wasn't specified in the trigger. Also notice that in the
+ trigger info output, there's a parameter, 'sort=hitcount', which
+ wasn't specified in the trigger either. The reason for that is that
+ every trigger implicitly keeps a count of the total number of hits
+ attributed to a given entry, called the 'hitcount'. That hitcount
+ information is explicitly displayed in the output, and in the
+ absence of a user-specified sort parameter, is used as the default
+ sort field.
+
+ The value 'hitcount' can be used in place of an explicit value in
+ the 'values' parameter if you don't really need to have any
+ particular field summed and are mainly interested in hit
+ frequencies.
+
+ To turn the hist trigger off, simply call up the trigger in the
+ command history and re-execute it with a '!' prepended:
+
+ # echo '!hist:key=call_site:val=bytes_req' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ Finally, notice that the call_site as displayed in the output above
+ isn't really very useful. It's an address, but normally addresses
+ are displayed in hex. To have a numeric field displayed as a hex
+ value, simply append '.hex' to the field name in the trigger:
+
+ # echo 'hist:key=call_site.hex:val=bytes_req' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+ # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+ { call_site: ffffffffa026b291 } hitcount: 1 bytes_req: 433
+ { call_site: ffffffffa07186ff } hitcount: 1 bytes_req: 176
+ { call_site: ffffffff811ae721 } hitcount: 1 bytes_req: 16384
+ { call_site: ffffffff811c5134 } hitcount: 1 bytes_req: 8
+ { call_site: ffffffffa04a9ebb } hitcount: 1 bytes_req: 511
+ { call_site: ffffffff8122e0a6 } hitcount: 1 bytes_req: 12
+ { call_site: ffffffff8107da84 } hitcount: 1 bytes_req: 152
+ { call_site: ffffffff812d8246 } hitcount: 1 bytes_req: 24
+ { call_site: ffffffff811dc1e5 } hitcount: 3 bytes_req: 144
+ { call_site: ffffffffa02515e8 } hitcount: 3 bytes_req: 648
+ { call_site: ffffffff81258159 } hitcount: 3 bytes_req: 144
+ { call_site: ffffffff811c80f4 } hitcount: 4 bytes_req: 544
+ .
+ .
+ .
+ { call_site: ffffffffa06c7646 } hitcount: 106 bytes_req: 8024
+ { call_site: ffffffffa06cb246 } hitcount: 132 bytes_req: 31680
+ { call_site: ffffffffa06cef7a } hitcount: 132 bytes_req: 2112
+ { call_site: ffffffff8137e399 } hitcount: 132 bytes_req: 23232
+ { call_site: ffffffffa06c941c } hitcount: 185 bytes_req: 171360
+ { call_site: ffffffffa06f2a66 } hitcount: 185 bytes_req: 26640
+ { call_site: ffffffffa036a70e } hitcount: 265 bytes_req: 10600
+ { call_site: ffffffff81325447 } hitcount: 292 bytes_req: 584
+ { call_site: ffffffffa072da3c } hitcount: 446 bytes_req: 60656
+ { call_site: ffffffffa036b1f2 } hitcount: 526 bytes_req: 29456
+ { call_site: ffffffffa0099c06 } hitcount: 1780 bytes_req: 35600
+
+ Totals:
+ Hits: 4775
+ Entries: 46
+ Dropped: 0
+
+ Even that's only marginally more useful - while hex values do look
+ more like addresses, what users are typically more interested in
+ when looking at text addresses are the corresponding symbols
+ instead. To have an address displayed as symbolic value instead,
+ simply append '.sym' or '.sym-offset' to the field name in the
+ trigger:
+
+ # echo 'hist:key=call_site.sym:val=bytes_req' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+ # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+ { call_site: [ffffffff810adcb9] syslog_print_all } hitcount: 1 bytes_req: 1024
+ { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8
+ { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7
+ { call_site: [ffffffff8154acbe] usb_alloc_urb } hitcount: 1 bytes_req: 192
+ { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7
+ { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40
+ { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128
+ { call_site: [ffffffff811febd5] fsnotify_alloc_group } hitcount: 2 bytes_req: 528
+ { call_site: [ffffffff81440f58] __tty_buffer_request_room } hitcount: 2 bytes_req: 2624
+ { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 2 bytes_req: 96
+ { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211] } hitcount: 2 bytes_req: 464
+ { call_site: [ffffffff81672406] tcp_get_metrics } hitcount: 2 bytes_req: 304
+ { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128
+ { call_site: [ffffffff81089b05] sched_create_group } hitcount: 2 bytes_req: 1424
+ .
+ .
+ .
+ { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1185 bytes_req: 123240
+ { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 1185 bytes_req: 104280
+ { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 1402 bytes_req: 190672
+ { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 1518 bytes_req: 146208
+ { call_site: [ffffffffa029070e] drm_vma_node_allow [drm] } hitcount: 1746 bytes_req: 69840
+ { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 2021 bytes_req: 792312
+ { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 2592 bytes_req: 145152
+ { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2629 bytes_req: 378576
+ { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2629 bytes_req: 3783248
+ { call_site: [ffffffff81325607] apparmor_file_alloc_security } hitcount: 5192 bytes_req: 10384
+ { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 5529 bytes_req: 110584
+ { call_site: [ffffffff8131ebf7] aa_alloc_task_context } hitcount: 21943 bytes_req: 702176
+ { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 55759 bytes_req: 5074265
+
+ Totals:
+ Hits: 109928
+ Entries: 71
+ Dropped: 0
+
+ Because the default sort key above is 'hitcount', the above shows a
+ the list of call_sites by increasing hitcount, so that at the bottom
+ we see the functions that made the most kmalloc calls during the
+ run. If instead we we wanted to see the top kmalloc callers in
+ terms of the number of bytes requested rather than the number of
+ calls, and we wanted the top caller to appear at the top, we can use
+ the 'sort' parameter, along with the 'descending' modifier:
+
+ # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+ # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
+
+ { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2186 bytes_req: 3397464
+ { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1790 bytes_req: 712176
+ { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 8132 bytes_req: 513135
+ { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 106 bytes_req: 440128
+ { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2186 bytes_req: 314784
+ { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 2174 bytes_req: 208992
+ { call_site: [ffffffff811ae8e1] __kmalloc } hitcount: 8 bytes_req: 131072
+ { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 859 bytes_req: 116824
+ { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 1834 bytes_req: 102704
+ { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 972 bytes_req: 101088
+ { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 972 bytes_req: 85536
+ { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 3333 bytes_req: 66664
+ { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 209 bytes_req: 61632
+ .
+ .
+ .
+ { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128
+ { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128
+ { call_site: [ffffffff812d8406] copy_semundo } hitcount: 2 bytes_req: 48
+ { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 1 bytes_req: 48
+ { call_site: [ffffffffa027121a] drm_getmagic [drm] } hitcount: 1 bytes_req: 48
+ { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40
+ { call_site: [ffffffff811c52f4] bprm_change_interp } hitcount: 2 bytes_req: 16
+ { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8
+ { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7
+ { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7
+
+ Totals:
+ Hits: 32133
+ Entries: 81
+ Dropped: 0
+
+ To display the offset and size information in addition to the symbol
+ name, just use 'sym-offset' instead:
+
+ # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+ # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
+
+ { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915] } hitcount: 4569 bytes_req: 3163720
+ { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915] } hitcount: 4569 bytes_req: 657936
+ { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915] } hitcount: 1519 bytes_req: 472936
+ { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915] } hitcount: 3050 bytes_req: 211832
+ { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50 } hitcount: 34 bytes_req: 148384
+ { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915] } hitcount: 1385 bytes_req: 144040
+ { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0 } hitcount: 8 bytes_req: 131072
+ { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm] } hitcount: 1385 bytes_req: 121880
+ { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm] } hitcount: 1848 bytes_req: 103488
+ { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915] } hitcount: 461 bytes_req: 62696
+ { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm] } hitcount: 1541 bytes_req: 61640
+ { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0 } hitcount: 57 bytes_req: 57456
+ .
+ .
+ .
+ { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0 } hitcount: 2 bytes_req: 128
+ { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm] } hitcount: 3 bytes_req: 96
+ { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0 } hitcount: 8 bytes_req: 96
+ { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650 } hitcount: 3 bytes_req: 84
+ { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110 } hitcount: 1 bytes_req: 8
+ { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid] } hitcount: 1 bytes_req: 7
+ { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid] } hitcount: 1 bytes_req: 7
+
+ Totals:
+ Hits: 26098
+ Entries: 64
+ Dropped: 0
+
+ We can also add multiple fields to the 'values' parameter. For
+ example, we might want to see the total number of bytes allocated
+ alongside bytes requested, and display the result sorted by bytes
+ allocated in a descending order:
+
+ # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+ # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active]
+
+ { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 7403 bytes_req: 4084360 bytes_alloc: 5958016
+ { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 541 bytes_req: 2213968 bytes_alloc: 2228224
+ { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 7404 bytes_req: 1066176 bytes_alloc: 1421568
+ { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1565 bytes_req: 557368 bytes_alloc: 1037760
+ { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 9557 bytes_req: 595778 bytes_alloc: 695744
+ { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 5839 bytes_req: 430680 bytes_alloc: 470400
+ { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 2388 bytes_req: 324768 bytes_alloc: 458496
+ { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 3911 bytes_req: 219016 bytes_alloc: 250304
+ { call_site: [ffffffff815f8d7b] sk_prot_alloc } hitcount: 235 bytes_req: 236880 bytes_alloc: 240640
+ { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 557 bytes_req: 169024 bytes_alloc: 221760
+ { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 9378 bytes_req: 187548 bytes_alloc: 206312
+ { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1519 bytes_req: 157976 bytes_alloc: 194432
+ .
+ .
+ .
+ { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach } hitcount: 2 bytes_req: 144 bytes_alloc: 192
+ { call_site: [ffffffff81097ee8] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128
+ { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128
+ { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128
+ { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128
+ { call_site: [ffffffff81213e80] load_elf_binary } hitcount: 3 bytes_req: 84 bytes_alloc: 96
+ { call_site: [ffffffff81079a2e] kthread_create_on_node } hitcount: 1 bytes_req: 56 bytes_alloc: 64
+ { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8
+ { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 bytes_alloc: 8
+ { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8
+
+ Totals:
+ Hits: 66598
+ Entries: 65
+ Dropped: 0
+
+ Finally, to finish off our kmalloc example, instead of simply having
+ the hist trigger display symbolic call_sites, we can have the hist
+ trigger additionally display the complete set of kernel stack traces
+ that led to each call_site. To do that, we simply use the special
+ value 'stacktrace' for the key parameter:
+
+ # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ The above trigger will use the kernel stack trace in effect when an
+ event is triggered as the key for the hash table. This allows the
+ enumeration of every kernel callpath that led up to a particular
+ event, along with a running total of any of the event fields for
+ that event. Here we tally bytes requested and bytes allocated for
+ every callpath in the system that led up to a kmalloc (in this case
+ every callpath to a kmalloc for a kernel compile):
+
+ # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+ # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
+
+ { stacktrace:
+ __kmalloc_track_caller+0x10b/0x1a0
+ kmemdup+0x20/0x50
+ hidraw_report_event+0x8a/0x120 [hid]
+ hid_report_raw_event+0x3ea/0x440 [hid]
+ hid_input_report+0x112/0x190 [hid]
+ hid_irq_in+0xc2/0x260 [usbhid]
+ __usb_hcd_giveback_urb+0x72/0x120
+ usb_giveback_urb_bh+0x9e/0xe0
+ tasklet_hi_action+0xf8/0x100
+ __do_softirq+0x114/0x2c0
+ irq_exit+0xa5/0xb0
+ do_IRQ+0x5a/0xf0
+ ret_from_intr+0x0/0x30
+ cpuidle_enter+0x17/0x20
+ cpu_startup_entry+0x315/0x3e0
+ rest_init+0x7c/0x80
+ } hitcount: 3 bytes_req: 21 bytes_alloc: 24
+ { stacktrace:
+ __kmalloc_track_caller+0x10b/0x1a0
+ kmemdup+0x20/0x50
+ hidraw_report_event+0x8a/0x120 [hid]
+ hid_report_raw_event+0x3ea/0x440 [hid]
+ hid_input_report+0x112/0x190 [hid]
+ hid_irq_in+0xc2/0x260 [usbhid]
+ __usb_hcd_giveback_urb+0x72/0x120
+ usb_giveback_urb_bh+0x9e/0xe0
+ tasklet_hi_action+0xf8/0x100
+ __do_softirq+0x114/0x2c0
+ irq_exit+0xa5/0xb0
+ do_IRQ+0x5a/0xf0
+ ret_from_intr+0x0/0x30
+ } hitcount: 3 bytes_req: 21 bytes_alloc: 24
+ { stacktrace:
+ kmem_cache_alloc_trace+0xeb/0x150
+ aa_alloc_task_context+0x27/0x40
+ apparmor_cred_prepare+0x1f/0x50
+ security_prepare_creds+0x16/0x20
+ prepare_creds+0xdf/0x1a0
+ SyS_capset+0xb5/0x200
+ system_call_fastpath+0x12/0x6a
+ } hitcount: 1 bytes_req: 32 bytes_alloc: 32
+ .
+ .
+ .
+ { stacktrace:
+ __kmalloc+0x11b/0x1b0
+ i915_gem_execbuffer2+0x6c/0x2c0 [i915]
+ drm_ioctl+0x349/0x670 [drm]
+ do_vfs_ioctl+0x2f0/0x4f0
+ SyS_ioctl+0x81/0xa0
+ system_call_fastpath+0x12/0x6a
+ } hitcount: 17726 bytes_req: 13944120 bytes_alloc: 19593808
+ { stacktrace:
+ __kmalloc+0x11b/0x1b0
+ load_elf_phdrs+0x76/0xa0
+ load_elf_binary+0x102/0x1650
+ search_binary_handler+0x97/0x1d0
+ do_execveat_common.isra.34+0x551/0x6e0
+ SyS_execve+0x3a/0x50
+ return_from_execve+0x0/0x23
+ } hitcount: 33348 bytes_req: 17152128 bytes_alloc: 20226048
+ { stacktrace:
+ kmem_cache_alloc_trace+0xeb/0x150
+ apparmor_file_alloc_security+0x27/0x40
+ security_file_alloc+0x16/0x20
+ get_empty_filp+0x93/0x1c0
+ path_openat+0x31/0x5f0
+ do_filp_open+0x3a/0x90
+ do_sys_open+0x128/0x220
+ SyS_open+0x1e/0x20
+ system_call_fastpath+0x12/0x6a
+ } hitcount: 4766422 bytes_req: 9532844 bytes_alloc: 38131376
+ { stacktrace:
+ __kmalloc+0x11b/0x1b0
+ seq_buf_alloc+0x1b/0x50
+ seq_read+0x2cc/0x370
+ proc_reg_read+0x3d/0x80
+ __vfs_read+0x28/0xe0
+ vfs_read+0x86/0x140
+ SyS_read+0x46/0xb0
+ system_call_fastpath+0x12/0x6a
+ } hitcount: 19133 bytes_req: 78368768 bytes_alloc: 78368768
+
+ Totals:
+ Hits: 6085872
+ Entries: 253
+ Dropped: 0
+
+ If you key a hist trigger on common_pid, in order for example to
+ gather and display sorted totals for each process, you can use the
+ special .execname modifier to display the executable names for the
+ processes in the table rather than raw pids. The example below
+ keeps a per-process sum of total bytes read:
+
+ # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
+
+ # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist
+ # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active]
+
+ { common_pid: gnome-terminal [ 3196] } hitcount: 280 count: 1093512
+ { common_pid: Xorg [ 1309] } hitcount: 525 count: 256640
+ { common_pid: compiz [ 2889] } hitcount: 59 count: 254400
+ { common_pid: bash [ 8710] } hitcount: 3 count: 66369
+ { common_pid: dbus-daemon-lau [ 8703] } hitcount: 49 count: 47739
+ { common_pid: irqbalance [ 1252] } hitcount: 27 count: 27648
+ { common_pid: 01ifupdown [ 8705] } hitcount: 3 count: 17216
+ { common_pid: dbus-daemon [ 772] } hitcount: 10 count: 12396
+ { common_pid: Socket Thread [ 8342] } hitcount: 11 count: 11264
+ { common_pid: nm-dhcp-client. [ 8701] } hitcount: 6 count: 7424
+ { common_pid: gmain [ 1315] } hitcount: 18 count: 6336
+ .
+ .
+ .
+ { common_pid: postgres [ 1892] } hitcount: 2 count: 32
+ { common_pid: postgres [ 1891] } hitcount: 2 count: 32
+ { common_pid: gmain [ 8704] } hitcount: 2 count: 32
+ { common_pid: upstart-dbus-br [ 2740] } hitcount: 21 count: 21
+ { common_pid: nm-dispatcher.a [ 8696] } hitcount: 1 count: 16
+ { common_pid: indicator-datet [ 2904] } hitcount: 1 count: 16
+ { common_pid: gdbus [ 2998] } hitcount: 1 count: 16
+ { common_pid: rtkit-daemon [ 2052] } hitcount: 1 count: 8
+ { common_pid: init [ 1] } hitcount: 2 count: 2
+
+ Totals:
+ Hits: 2116
+ Entries: 51
+ Dropped: 0
+
+ Similarly, if you key a hist trigger on syscall id, for example to
+ gather and display a list of systemwide syscall hits, you can use
+ the special .syscall modifier to display the syscall names rather
+ than raw ids. The example below keeps a running total of syscall
+ counts for the system during the run:
+
+ # echo 'hist:key=id.syscall:val=hitcount' > \
+ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+ # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+ # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active]
+
+ { id: sys_fsync [ 74] } hitcount: 1
+ { id: sys_newuname [ 63] } hitcount: 1
+ { id: sys_prctl [157] } hitcount: 1
+ { id: sys_statfs [137] } hitcount: 1
+ { id: sys_symlink [ 88] } hitcount: 1
+ { id: sys_sendmmsg [307] } hitcount: 1
+ { id: sys_semctl [ 66] } hitcount: 1
+ { id: sys_readlink [ 89] } hitcount: 3
+ { id: sys_bind [ 49] } hitcount: 3
+ { id: sys_getsockname [ 51] } hitcount: 3
+ { id: sys_unlink [ 87] } hitcount: 3
+ { id: sys_rename [ 82] } hitcount: 4
+ { id: unknown_syscall [ 58] } hitcount: 4
+ { id: sys_connect [ 42] } hitcount: 4
+ { id: sys_getpid [ 39] } hitcount: 4
+ .
+ .
+ .
+ { id: sys_rt_sigprocmask [ 14] } hitcount: 952
+ { id: sys_futex [202] } hitcount: 1534
+ { id: sys_write [ 1] } hitcount: 2689
+ { id: sys_setitimer [ 38] } hitcount: 2797
+ { id: sys_read [ 0] } hitcount: 3202
+ { id: sys_select [ 23] } hitcount: 3773
+ { id: sys_writev [ 20] } hitcount: 4531
+ { id: sys_poll [ 7] } hitcount: 8314
+ { id: sys_recvmsg [ 47] } hitcount: 13738
+ { id: sys_ioctl [ 16] } hitcount: 21843
+
+ Totals:
+ Hits: 67612
+ Entries: 72
+ Dropped: 0
+
+ The syscall counts above provide a rough overall picture of system
+ call activity on the system; we can see for example that the most
+ popular system call on this system was the 'sys_ioctl' system call.
+
+ We can use 'compound' keys to refine that number and provide some
+ further insight as to which processes exactly contribute to the
+ overall ioctl count.
+
+ The command below keeps a hitcount for every unique combination of
+ system call id and pid - the end result is essentially a table
+ that keeps a per-pid sum of system call hits. The results are
+ sorted using the system call id as the primary key, and the
+ hitcount sum as the secondary key:
+
+ # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \
+ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+ # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+ # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active]
+
+ { id: sys_read [ 0], common_pid: rtkit-daemon [ 1877] } hitcount: 1
+ { id: sys_read [ 0], common_pid: gdbus [ 2976] } hitcount: 1
+ { id: sys_read [ 0], common_pid: console-kit-dae [ 3400] } hitcount: 1
+ { id: sys_read [ 0], common_pid: postgres [ 1865] } hitcount: 1
+ { id: sys_read [ 0], common_pid: deja-dup-monito [ 3543] } hitcount: 2
+ { id: sys_read [ 0], common_pid: NetworkManager [ 890] } hitcount: 2
+ { id: sys_read [ 0], common_pid: evolution-calen [ 3048] } hitcount: 2
+ { id: sys_read [ 0], common_pid: postgres [ 1864] } hitcount: 2
+ { id: sys_read [ 0], common_pid: nm-applet [ 3022] } hitcount: 2
+ { id: sys_read [ 0], common_pid: whoopsie [ 1212] } hitcount: 2
+ .
+ .
+ .
+ { id: sys_ioctl [ 16], common_pid: bash [ 8479] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: bash [ 3472] } hitcount: 12
+ { id: sys_ioctl [ 16], common_pid: gnome-terminal [ 3199] } hitcount: 16
+ { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 1808
+ { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 5580
+ .
+ .
+ .
+ { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2690] } hitcount: 3
+ { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2688] } hitcount: 16
+ { id: sys_inotify_add_watch [254], common_pid: gmain [ 975] } hitcount: 2
+ { id: sys_inotify_add_watch [254], common_pid: gmain [ 3204] } hitcount: 4
+ { id: sys_inotify_add_watch [254], common_pid: gmain [ 2888] } hitcount: 4
+ { id: sys_inotify_add_watch [254], common_pid: gmain [ 3003] } hitcount: 4
+ { id: sys_inotify_add_watch [254], common_pid: gmain [ 2873] } hitcount: 4
+ { id: sys_inotify_add_watch [254], common_pid: gmain [ 3196] } hitcount: 6
+ { id: sys_openat [257], common_pid: java [ 2623] } hitcount: 2
+ { id: sys_eventfd2 [290], common_pid: ibus-ui-gtk3 [ 2760] } hitcount: 4
+ { id: sys_eventfd2 [290], common_pid: compiz [ 2994] } hitcount: 6
+
+ Totals:
+ Hits: 31536
+ Entries: 323
+ Dropped: 0
+
+ The above list does give us a breakdown of the ioctl syscall by
+ pid, but it also gives us quite a bit more than that, which we
+ don't really care about at the moment. Since we know the syscall
+ id for sys_ioctl (16, displayed next to the sys_ioctl name), we
+ can use that to filter out all the other syscalls:
+
+ # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \
+ /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+ # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+ # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active]
+
+ { id: sys_ioctl [ 16], common_pid: gmain [ 2769] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: evolution-addre [ 8571] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: gmain [ 3003] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: gmain [ 2781] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: gmain [ 2829] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: bash [ 8726] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: bash [ 8508] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: gmain [ 2970] } hitcount: 1
+ { id: sys_ioctl [ 16], common_pid: gmain [ 2768] } hitcount: 1
+ .
+ .
+ .
+ { id: sys_ioctl [ 16], common_pid: pool [ 8559] } hitcount: 45
+ { id: sys_ioctl [ 16], common_pid: pool [ 8555] } hitcount: 48
+ { id: sys_ioctl [ 16], common_pid: pool [ 8551] } hitcount: 48
+ { id: sys_ioctl [ 16], common_pid: avahi-daemon [ 896] } hitcount: 66
+ { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 26674
+ { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 73443
+
+ Totals:
+ Hits: 101162
+ Entries: 103
+ Dropped: 0
+
+ The above output shows that 'compiz' and 'Xorg' are far and away
+ the heaviest ioctl callers (which might lead to questions about
+ whether they really need to be making all those calls and to
+ possible avenues for further investigation.)
+
+ The compound key examples used a key and a sum value (hitcount) to
+ sort the output, but we can just as easily use two keys instead.
+ Here's an example where we use a compound key composed of the the
+ common_pid and size event fields. Sorting with pid as the primary
+ key and 'size' as the secondary key allows us to display an
+ ordered summary of the recvfrom sizes, with counts, received by
+ each process:
+
+ # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger
+
+ # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist
+ # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active]
+
+ { common_pid: smbd [ 784], size: 4 } hitcount: 1
+ { common_pid: dnsmasq [ 1412], size: 4096 } hitcount: 672
+ { common_pid: postgres [ 1796], size: 1000 } hitcount: 6
+ { common_pid: postgres [ 1867], size: 1000 } hitcount: 10
+ { common_pid: bamfdaemon [ 2787], size: 28 } hitcount: 2
+ { common_pid: bamfdaemon [ 2787], size: 14360 } hitcount: 1
+ { common_pid: compiz [ 2994], size: 8 } hitcount: 1
+ { common_pid: compiz [ 2994], size: 20 } hitcount: 11
+ { common_pid: gnome-terminal [ 3199], size: 4 } hitcount: 2
+ { common_pid: firefox [ 8817], size: 4 } hitcount: 1
+ { common_pid: firefox [ 8817], size: 8 } hitcount: 5
+ { common_pid: firefox [ 8817], size: 588 } hitcount: 2
+ { common_pid: firefox [ 8817], size: 628 } hitcount: 1
+ { common_pid: firefox [ 8817], size: 6944 } hitcount: 1
+ { common_pid: firefox [ 8817], size: 408880 } hitcount: 2
+ { common_pid: firefox [ 8822], size: 8 } hitcount: 2
+ { common_pid: firefox [ 8822], size: 160 } hitcount: 2
+ { common_pid: firefox [ 8822], size: 320 } hitcount: 2
+ { common_pid: firefox [ 8822], size: 352 } hitcount: 1
+ .
+ .
+ .
+ { common_pid: pool [ 8923], size: 1960 } hitcount: 10
+ { common_pid: pool [ 8923], size: 2048 } hitcount: 10
+ { common_pid: pool [ 8924], size: 1960 } hitcount: 10
+ { common_pid: pool [ 8924], size: 2048 } hitcount: 10
+ { common_pid: pool [ 8928], size: 1964 } hitcount: 4
+ { common_pid: pool [ 8928], size: 1965 } hitcount: 2
+ { common_pid: pool [ 8928], size: 2048 } hitcount: 6
+ { common_pid: pool [ 8929], size: 1982 } hitcount: 1
+ { common_pid: pool [ 8929], size: 2048 } hitcount: 1
+
+ Totals:
+ Hits: 2016
+ Entries: 224
+ Dropped: 0
+
+ The above example also illustrates the fact that although a compound
+ key is treated as a single entity for hashing purposes, the sub-keys
+ it's composed of can be accessed independently.
+
+ The next example uses a string field as the hash key and
+ demonstrates how you can manually pause and continue a hist trigger.
+ In this example, we'll aggregate fork counts and don't expect a
+ large number of entries in the hash table, so we'll drop it to a
+ much smaller number, say 256:
+
+ # echo 'hist:key=child_comm:val=hitcount:size=256' > \
+ /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+ # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+ # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
+
+ { child_comm: dconf worker } hitcount: 1
+ { child_comm: ibus-daemon } hitcount: 1
+ { child_comm: whoopsie } hitcount: 1
+ { child_comm: smbd } hitcount: 1
+ { child_comm: gdbus } hitcount: 1
+ { child_comm: kthreadd } hitcount: 1
+ { child_comm: dconf worker } hitcount: 1
+ { child_comm: evolution-alarm } hitcount: 2
+ { child_comm: Socket Thread } hitcount: 2
+ { child_comm: postgres } hitcount: 2
+ { child_comm: bash } hitcount: 3
+ { child_comm: compiz } hitcount: 3
+ { child_comm: evolution-sourc } hitcount: 4
+ { child_comm: dhclient } hitcount: 4
+ { child_comm: pool } hitcount: 5
+ { child_comm: nm-dispatcher.a } hitcount: 8
+ { child_comm: firefox } hitcount: 8
+ { child_comm: dbus-daemon } hitcount: 8
+ { child_comm: glib-pacrunner } hitcount: 10
+ { child_comm: evolution } hitcount: 23
+
+ Totals:
+ Hits: 89
+ Entries: 20
+ Dropped: 0
+
+ If we want to pause the hist trigger, we can simply append :pause to
+ the command that started the trigger. Notice that the trigger info
+ displays as [paused]:
+
+ # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \
+ /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+ # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+ # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused]
+
+ { child_comm: dconf worker } hitcount: 1
+ { child_comm: kthreadd } hitcount: 1
+ { child_comm: dconf worker } hitcount: 1
+ { child_comm: gdbus } hitcount: 1
+ { child_comm: ibus-daemon } hitcount: 1
+ { child_comm: Socket Thread } hitcount: 2
+ { child_comm: evolution-alarm } hitcount: 2
+ { child_comm: smbd } hitcount: 2
+ { child_comm: bash } hitcount: 3
+ { child_comm: whoopsie } hitcount: 3
+ { child_comm: compiz } hitcount: 3
+ { child_comm: evolution-sourc } hitcount: 4
+ { child_comm: pool } hitcount: 5
+ { child_comm: postgres } hitcount: 6
+ { child_comm: firefox } hitcount: 8
+ { child_comm: dhclient } hitcount: 10
+ { child_comm: emacs } hitcount: 12
+ { child_comm: dbus-daemon } hitcount: 20
+ { child_comm: nm-dispatcher.a } hitcount: 20
+ { child_comm: evolution } hitcount: 35
+ { child_comm: glib-pacrunner } hitcount: 59
+
+ Totals:
+ Hits: 199
+ Entries: 21
+ Dropped: 0
+
+ To manually continue having the trigger aggregate events, append
+ :cont instead. Notice that the trigger info displays as [active]
+ again, and the data has changed:
+
+ # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \
+ /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+ # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+ # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
+
+ { child_comm: dconf worker } hitcount: 1
+ { child_comm: dconf worker } hitcount: 1
+ { child_comm: kthreadd } hitcount: 1
+ { child_comm: gdbus } hitcount: 1
+ { child_comm: ibus-daemon } hitcount: 1
+ { child_comm: Socket Thread } hitcount: 2
+ { child_comm: evolution-alarm } hitcount: 2
+ { child_comm: smbd } hitcount: 2
+ { child_comm: whoopsie } hitcount: 3
+ { child_comm: compiz } hitcount: 3
+ { child_comm: evolution-sourc } hitcount: 4
+ { child_comm: bash } hitcount: 5
+ { child_comm: pool } hitcount: 5
+ { child_comm: postgres } hitcount: 6
+ { child_comm: firefox } hitcount: 8
+ { child_comm: dhclient } hitcount: 11
+ { child_comm: emacs } hitcount: 12
+ { child_comm: dbus-daemon } hitcount: 22
+ { child_comm: nm-dispatcher.a } hitcount: 22
+ { child_comm: evolution } hitcount: 35
+ { child_comm: glib-pacrunner } hitcount: 59
+
+ Totals:
+ Hits: 206
+ Entries: 21
+ Dropped: 0
+
+ The previous example showed how to start and stop a hist trigger by
+ appending 'pause' and 'continue' to the hist trigger command. A
+ hist trigger can also be started in a paused state by initially
+ starting the trigger with ':pause' appended. This allows you to
+ start the trigger only when you're ready to start collecting data
+ and not before. For example, you could start the trigger in a
+ paused state, then unpause it and do something you want to measure,
+ then pause the trigger again when done.
+
+ Of course, doing this manually can be difficult and error-prone, but
+ it is possible to automatically start and stop a hist trigger based
+ on some condition, via the enable_hist and disable_hist triggers.
+
+ For example, suppose we wanted to take a look at the relative
+ weights in terms of skb length for each callpath that leads to a
+ netif_receieve_skb event when downloading a decent-sized file using
+ wget.
+
+ First we set up an initially paused stacktrace trigger on the
+ netif_receive_skb event:
+
+ # echo 'hist:key=stacktrace:vals=len:pause' > \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+ Next, we set up an 'enable_hist' trigger on the sched_process_exec
+ event, with an 'if filename==/usr/bin/wget' filter. The effect of
+ this new trigger is that it will 'unpause' the hist trigger we just
+ set up on netif_receive_skb if and only if it sees a
+ sched_process_exec event with a filename of '/usr/bin/wget'. When
+ that happens, all netif_receive_skb events are aggregated into a
+ hash table keyed on stacktrace:
+
+ # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
+ /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+ The aggregation continues until the netif_receive_skb is paused
+ again, which is what the following disable_hist event does by
+ creating a similar setup on the sched_process_exit event, using the
+ filter 'comm==wget':
+
+ # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
+ /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+ Whenever a process exits and the comm field of the disable_hist
+ trigger filter matches 'comm==wget', the netif_receive_skb hist
+ trigger is disabled.
+
+ The overall effect is that netif_receive_skb events are aggregated
+ into the hash table for only the duration of the wget. Executing a
+ wget command and then listing the 'hist' file will display the
+ output generated by the wget command:
+
+ $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
+
+ # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+ # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+
+ { stacktrace:
+ __netif_receive_skb_core+0x46d/0x990
+ __netif_receive_skb+0x18/0x60
+ netif_receive_skb_internal+0x23/0x90
+ napi_gro_receive+0xc8/0x100
+ ieee80211_deliver_skb+0xd6/0x270 [mac80211]
+ ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
+ ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
+ ieee80211_rx+0x31d/0x900 [mac80211]
+ iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
+ iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
+ iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
+ irq_thread_fn+0x20/0x50
+ irq_thread+0x11f/0x150
+ kthread+0xd2/0xf0
+ ret_from_fork+0x42/0x70
+ } hitcount: 85 len: 28884
+ { stacktrace:
+ __netif_receive_skb_core+0x46d/0x990
+ __netif_receive_skb+0x18/0x60
+ netif_receive_skb_internal+0x23/0x90
+ napi_gro_complete+0xa4/0xe0
+ dev_gro_receive+0x23a/0x360
+ napi_gro_receive+0x30/0x100
+ ieee80211_deliver_skb+0xd6/0x270 [mac80211]
+ ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
+ ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
+ ieee80211_rx+0x31d/0x900 [mac80211]
+ iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
+ iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
+ iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
+ irq_thread_fn+0x20/0x50
+ irq_thread+0x11f/0x150
+ kthread+0xd2/0xf0
+ } hitcount: 98 len: 664329
+ { stacktrace:
+ __netif_receive_skb_core+0x46d/0x990
+ __netif_receive_skb+0x18/0x60
+ process_backlog+0xa8/0x150
+ net_rx_action+0x15d/0x340
+ __do_softirq+0x114/0x2c0
+ do_softirq_own_stack+0x1c/0x30
+ do_softirq+0x65/0x70
+ __local_bh_enable_ip+0xb5/0xc0
+ ip_finish_output+0x1f4/0x840
+ ip_output+0x6b/0xc0
+ ip_local_out_sk+0x31/0x40
+ ip_send_skb+0x1a/0x50
+ udp_send_skb+0x173/0x2a0
+ udp_sendmsg+0x2bf/0x9f0
+ inet_sendmsg+0x64/0xa0
+ sock_sendmsg+0x3d/0x50
+ } hitcount: 115 len: 13030
+ { stacktrace:
+ __netif_receive_skb_core+0x46d/0x990
+ __netif_receive_skb+0x18/0x60
+ netif_receive_skb_internal+0x23/0x90
+ napi_gro_complete+0xa4/0xe0
+ napi_gro_flush+0x6d/0x90
+ iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi]
+ irq_thread_fn+0x20/0x50
+ irq_thread+0x11f/0x150
+ kthread+0xd2/0xf0
+ ret_from_fork+0x42/0x70
+ } hitcount: 934 len: 5512212
+
+ Totals:
+ Hits: 1232
+ Entries: 4
+ Dropped: 0
+
+ The above shows all the netif_receive_skb callpaths and their total
+ lengths for the duration of the wget command.
+
+ The 'clear' hist trigger param can be used to clear the hash table.
+ Suppose we wanted to try another run of the previous example but
+ this time also wanted to see the complete list of events that went
+ into the histogram. In order to avoid having to set everything up
+ again, we can just clear the histogram first:
+
+ # echo 'hist:key=stacktrace:vals=len:clear' >> \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+ Just to verify that it is in fact cleared, here's what we now see in
+ the hist file:
+
+ # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+ # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+
+ Totals:
+ Hits: 0
+ Entries: 0
+ Dropped: 0
+
+ Since we want to see the detailed list of every netif_receive_skb
+ event occurring during the new run, which are in fact the same
+ events being aggregated into the hash table, we add some additional
+ 'enable_event' events to the triggering sched_process_exec and
+ sched_process_exit events as such:
+
+ # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \
+ /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+ # echo 'disable_event:net:netif_receive_skb if comm==wget' > \
+ /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+ If you read the trigger files for the sched_process_exec and
+ sched_process_exit triggers, you should see two triggers for each:
+ one enabling/disabling the hist aggregation and the other
+ enabling/disabling the logging of events:
+
+ # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+ enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
+ enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
+
+ # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+ enable_event:net:netif_receive_skb:unlimited if comm==wget
+ disable_hist:net:netif_receive_skb:unlimited if comm==wget
+
+ In other words, whenever either of the sched_process_exec or
+ sched_process_exit events is hit and matches 'wget', it enables or
+ disables both the histogram and the event log, and what you end up
+ with is a hash table and set of events just covering the specified
+ duration. Run the wget command again:
+
+ $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
+
+ Displaying the 'hist' file should show something similar to what you
+ saw in the last run, but this time you should also see the
+ individual events in the trace file:
+
+ # cat /sys/kernel/debug/tracing/trace
+
+ # tracer: nop
+ #
+ # entries-in-buffer/entries-written: 183/1426 #P:4
+ #
+ # _-----=> irqs-off
+ # / _----=> need-resched
+ # | / _---=> hardirq/softirq
+ # || / _--=> preempt-depth
+ # ||| / delay
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
+ # | | | |||| | |
+ wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60
+ wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60
+ dnsmasq-1382 [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130
+ dnsmasq-1382 [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138
+ ##### CPU 2 buffer started ####
+ irq/29-iwlwifi-559 [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948
+ irq/29-iwlwifi-559 [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500
+ irq/29-iwlwifi-559 [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948
+ irq/29-iwlwifi-559 [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948
+ irq/29-iwlwifi-559 [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500
+ .
+ .
+ .
+
+ The following example demonstrates how multiple hist triggers can be
+ attached to a given event. This capability can be useful for
+ creating a set of different summaries derived from the same set of
+ events, or for comparing the effects of different filters, among
+ other things.
+
+ # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+ # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+ # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+ # echo 'hist:keys=skbaddr.hex:vals=len' >> \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+ # echo 'hist:keys=len:vals=common_preempt_count' >> \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+ The above set of commands create four triggers differing only in
+ their filters, along with a completely different though fairly
+ nonsensical trigger. Note that in order to append multiple hist
+ triggers to the same file, you should use the '>>' operator to
+ append them ('>' will also add the new hist trigger, but will remove
+ any existing hist triggers beforehand).
+
+ Displaying the contents of the 'hist' file for the event shows the
+ contents of all five histograms:
+
+ # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+
+ # event histogram
+ #
+ # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active]
+ #
+
+ { len: 176 } hitcount: 1 common_preempt_count: 0
+ { len: 223 } hitcount: 1 common_preempt_count: 0
+ { len: 4854 } hitcount: 1 common_preempt_count: 0
+ { len: 395 } hitcount: 1 common_preempt_count: 0
+ { len: 177 } hitcount: 1 common_preempt_count: 0
+ { len: 446 } hitcount: 1 common_preempt_count: 0
+ { len: 1601 } hitcount: 1 common_preempt_count: 0
+ .
+ .
+ .
+ { len: 1280 } hitcount: 66 common_preempt_count: 0
+ { len: 116 } hitcount: 81 common_preempt_count: 40
+ { len: 708 } hitcount: 112 common_preempt_count: 0
+ { len: 46 } hitcount: 221 common_preempt_count: 0
+ { len: 1264 } hitcount: 458 common_preempt_count: 0
+
+ Totals:
+ Hits: 1428
+ Entries: 147
+ Dropped: 0
+
+
+ # event histogram
+ #
+ # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+ #
+
+ { skbaddr: ffff8800baee5e00 } hitcount: 1 len: 130
+ { skbaddr: ffff88005f3d5600 } hitcount: 1 len: 1280
+ { skbaddr: ffff88005f3d4900 } hitcount: 1 len: 1280
+ { skbaddr: ffff88009fed6300 } hitcount: 1 len: 115
+ { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 115
+ { skbaddr: ffff88008cdb1900 } hitcount: 1 len: 46
+ { skbaddr: ffff880064b5ef00 } hitcount: 1 len: 118
+ { skbaddr: ffff880044e3c700 } hitcount: 1 len: 60
+ { skbaddr: ffff880100065900 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d46bd500 } hitcount: 1 len: 116
+ { skbaddr: ffff88005f3d5f00 } hitcount: 1 len: 1280
+ { skbaddr: ffff880100064700 } hitcount: 1 len: 365
+ { skbaddr: ffff8800badb6f00 } hitcount: 1 len: 60
+ .
+ .
+ .
+ { skbaddr: ffff88009fe0be00 } hitcount: 27 len: 24677
+ { skbaddr: ffff88009fe0a400 } hitcount: 27 len: 23052
+ { skbaddr: ffff88009fe0b700 } hitcount: 31 len: 25589
+ { skbaddr: ffff88009fe0b600 } hitcount: 32 len: 27326
+ { skbaddr: ffff88006a462800 } hitcount: 68 len: 71678
+ { skbaddr: ffff88006a463700 } hitcount: 70 len: 72678
+ { skbaddr: ffff88006a462b00 } hitcount: 71 len: 77589
+ { skbaddr: ffff88006a463600 } hitcount: 73 len: 71307
+ { skbaddr: ffff88006a462200 } hitcount: 81 len: 81032
+
+ Totals:
+ Hits: 1451
+ Entries: 318
+ Dropped: 0
+
+
+ # event histogram
+ #
+ # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active]
+ #
+
+
+ Totals:
+ Hits: 0
+ Entries: 0
+ Dropped: 0
+
+
+ # event histogram
+ #
+ # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active]
+ #
+
+ { skbaddr: ffff88009fd2c300 } hitcount: 1 len: 7212
+ { skbaddr: ffff8800d2bcce00 } hitcount: 1 len: 7212
+ { skbaddr: ffff8800d2bcd700 } hitcount: 1 len: 7212
+ { skbaddr: ffff8800d2bcda00 } hitcount: 1 len: 21492
+ { skbaddr: ffff8800ae2e2d00 } hitcount: 1 len: 7212
+ { skbaddr: ffff8800d2bcdb00 } hitcount: 1 len: 7212
+ { skbaddr: ffff88006a4df500 } hitcount: 1 len: 4854
+ { skbaddr: ffff88008ce47b00 } hitcount: 1 len: 18636
+ { skbaddr: ffff8800ae2e2200 } hitcount: 1 len: 12924
+ { skbaddr: ffff88005f3e1000 } hitcount: 1 len: 4356
+ { skbaddr: ffff8800d2bcdc00 } hitcount: 2 len: 24420
+ { skbaddr: ffff8800d2bcc200 } hitcount: 2 len: 12996
+
+ Totals:
+ Hits: 14
+ Entries: 12
+ Dropped: 0
+
+
+ # event histogram
+ #
+ # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active]
+ #
+
+
+ Totals:
+ Hits: 0
+ Entries: 0
+ Dropped: 0
+
+ Named triggers can be used to have triggers share a common set of
+ histogram data. This capability is mostly useful for combining the
+ output of events generated by tracepoints contained inside inline
+ functions, but names can be used in a hist trigger on any event.
+ For example, these two triggers when hit will update the same 'len'
+ field in the shared 'foo' histogram data:
+
+ # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
+ /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+ # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
+ /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+ You can see that they're updating common histogram data by reading
+ each event's hist files at the same time:
+
+ # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist;
+ cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+ # event histogram
+ #
+ # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+ #
+
+ { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46
+ { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76
+ { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468
+ { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46
+ { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52
+ { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168
+ { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260
+ { skbaddr: ffff880064505000 } hitcount: 1 len: 46
+ { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32
+ { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44
+ { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168
+ { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40
+ { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40
+ { skbaddr: ffff880064505f00 } hitcount: 1 len: 174
+ { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160
+ { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76
+ { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32
+ { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988
+ { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46
+ { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44
+ { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676
+ { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107
+ { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92
+ { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142
+ { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220
+ { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92
+ { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92
+ { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675
+ { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138
+ { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138
+ { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184
+ { skbaddr: ffff880064504400 } hitcount: 4 len: 184
+ { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184
+ { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230
+ { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196
+ { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276
+ { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276
+
+ Totals:
+ Hits: 81
+ Entries: 42
+ Dropped: 0
+ # event histogram
+ #
+ # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+ #
+
+ { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46
+ { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76
+ { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468
+ { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46
+ { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52
+ { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168
+ { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260
+ { skbaddr: ffff880064505000 } hitcount: 1 len: 46
+ { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32
+ { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44
+ { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168
+ { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40
+ { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40
+ { skbaddr: ffff880064505f00 } hitcount: 1 len: 174
+ { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160
+ { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76
+ { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32
+ { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46
+ { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988
+ { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46
+ { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44
+ { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676
+ { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107
+ { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92
+ { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142
+ { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220
+ { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92
+ { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92
+ { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675
+ { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138
+ { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138
+ { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184
+ { skbaddr: ffff880064504400 } hitcount: 4 len: 184
+ { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184
+ { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230
+ { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196
+ { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276
+ { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276
+
+ Totals:
+ Hits: 81
+ Entries: 42
+ Dropped: 0
+
+ And here's an example that shows how to combine histogram data from
+ any two events even if they don't share any 'compatible' fields
+ other than 'hitcount' and 'stacktrace'. These commands create a
+ couple of triggers named 'bar' using those fields:
+
+ # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+ /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+ # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+ /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+ And displaying the output of either shows some interesting if
+ somewhat confusing output:
+
+ # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+ # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+ # event histogram
+ #
+ # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
+ #
+
+ { stacktrace:
+ _do_fork+0x18e/0x330
+ kernel_thread+0x29/0x30
+ kthreadd+0x154/0x1b0
+ ret_from_fork+0x3f/0x70
+ } hitcount: 1
+ { stacktrace:
+ netif_rx_internal+0xb2/0xd0
+ netif_rx_ni+0x20/0x70
+ dev_loopback_xmit+0xaa/0xd0
+ ip_mc_output+0x126/0x240
+ ip_local_out_sk+0x31/0x40
+ igmp_send_report+0x1e9/0x230
+ igmp_timer_expire+0xe9/0x120
+ call_timer_fn+0x39/0xf0
+ run_timer_softirq+0x1e1/0x290
+ __do_softirq+0xfd/0x290
+ irq_exit+0x98/0xb0
+ smp_apic_timer_interrupt+0x4a/0x60
+ apic_timer_interrupt+0x6d/0x80
+ cpuidle_enter+0x17/0x20
+ call_cpuidle+0x3b/0x60
+ cpu_startup_entry+0x22d/0x310
+ } hitcount: 1
+ { stacktrace:
+ netif_rx_internal+0xb2/0xd0
+ netif_rx_ni+0x20/0x70
+ dev_loopback_xmit+0xaa/0xd0
+ ip_mc_output+0x17f/0x240
+ ip_local_out_sk+0x31/0x40
+ ip_send_skb+0x1a/0x50
+ udp_send_skb+0x13e/0x270
+ udp_sendmsg+0x2bf/0x980
+ inet_sendmsg+0x67/0xa0
+ sock_sendmsg+0x38/0x50
+ SYSC_sendto+0xef/0x170
+ SyS_sendto+0xe/0x10
+ entry_SYSCALL_64_fastpath+0x12/0x6a
+ } hitcount: 2
+ { stacktrace:
+ netif_rx_internal+0xb2/0xd0
+ netif_rx+0x1c/0x60
+ loopback_xmit+0x6c/0xb0
+ dev_hard_start_xmit+0x219/0x3a0
+ __dev_queue_xmit+0x415/0x4f0
+ dev_queue_xmit_sk+0x13/0x20
+ ip_finish_output2+0x237/0x340
+ ip_finish_output+0x113/0x1d0
+ ip_output+0x66/0xc0
+ ip_local_out_sk+0x31/0x40
+ ip_send_skb+0x1a/0x50
+ udp_send_skb+0x16d/0x270
+ udp_sendmsg+0x2bf/0x980
+ inet_sendmsg+0x67/0xa0
+ sock_sendmsg+0x38/0x50
+ ___sys_sendmsg+0x14e/0x270
+ } hitcount: 76
+ { stacktrace:
+ netif_rx_internal+0xb2/0xd0
+ netif_rx+0x1c/0x60
+ loopback_xmit+0x6c/0xb0
+ dev_hard_start_xmit+0x219/0x3a0
+ __dev_queue_xmit+0x415/0x4f0
+ dev_queue_xmit_sk+0x13/0x20
+ ip_finish_output2+0x237/0x340
+ ip_finish_output+0x113/0x1d0
+ ip_output+0x66/0xc0
+ ip_local_out_sk+0x31/0x40
+ ip_send_skb+0x1a/0x50
+ udp_send_skb+0x16d/0x270
+ udp_sendmsg+0x2bf/0x980
+ inet_sendmsg+0x67/0xa0
+ sock_sendmsg+0x38/0x50
+ ___sys_sendmsg+0x269/0x270
+ } hitcount: 77
+ { stacktrace:
+ netif_rx_internal+0xb2/0xd0
+ netif_rx+0x1c/0x60
+ loopback_xmit+0x6c/0xb0
+ dev_hard_start_xmit+0x219/0x3a0
+ __dev_queue_xmit+0x415/0x4f0
+ dev_queue_xmit_sk+0x13/0x20
+ ip_finish_output2+0x237/0x340
+ ip_finish_output+0x113/0x1d0
+ ip_output+0x66/0xc0
+ ip_local_out_sk+0x31/0x40
+ ip_send_skb+0x1a/0x50
+ udp_send_skb+0x16d/0x270
+ udp_sendmsg+0x2bf/0x980
+ inet_sendmsg+0x67/0xa0
+ sock_sendmsg+0x38/0x50
+ SYSC_sendto+0xef/0x170
+ } hitcount: 88
+ { stacktrace:
+ _do_fork+0x18e/0x330
+ SyS_clone+0x19/0x20
+ entry_SYSCALL_64_fastpath+0x12/0x6a
+ } hitcount: 244
+
+ Totals:
+ Hits: 489
+ Entries: 7
+ Dropped: 0
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index f52f297cb406..a6b3705e62a6 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -210,6 +210,11 @@ of ftrace. Here is a list of some of the key files:
Note, sched_switch and sched_wake_up will also trace events
listed in this file.
+ To have the PIDs of children of tasks with their PID in this file
+ added on fork, enable the "event-fork" option. That option will also
+ cause the PIDs of tasks to be removed from this file when the task
+ exits.
+
set_graph_function:
Set a "trigger" function where tracing should start
@@ -725,16 +730,14 @@ noraw
nohex
nobin
noblock
-nostacktrace
trace_printk
-noftrace_preempt
nobranch
annotate
nouserstacktrace
nosym-userobj
noprintk-msg-only
context-info
-latency-format
+nolatency-format
sleep-time
graph-time
record-cmd
@@ -742,7 +745,10 @@ overwrite
nodisable_on_free
irq-info
markers
+noevent-fork
function-trace
+nodisplay-graph
+nostacktrace
To disable one of the options, echo in the option prepended with
"no".
@@ -796,11 +802,6 @@ Here are the available options:
block - When set, reading trace_pipe will not block when polled.
- stacktrace - This is one of the options that changes the trace
- itself. When a trace is recorded, so is the stack
- of functions. This allows for back traces of
- trace sites.
-
trace_printk - Can disable trace_printk() from writing into the buffer.
branch - Enable branch tracing with the tracer.
@@ -897,6 +898,10 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
When disabled, the trace_marker will error with EINVAL
on write.
+ event-fork - When set, tasks with PIDs listed in set_event_pid will have
+ the PIDs of their children added to set_event_pid when those
+ tasks fork. Also, when tasks with PIDs in set_event_pid exit,
+ their PIDs will be removed from the file.
function-trace - The latency tracers will enable function tracing
if this option is enabled (default it is). When
@@ -904,8 +909,17 @@ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
functions. This keeps the overhead of the tracer down
when performing latency tests.
- Note: Some tracers have their own options. They only appear
- when the tracer is active.
+ display-graph - When set, the latency tracers (irqsoff, wakeup, etc) will
+ use function graph tracing instead of function tracing.
+
+ stacktrace - This is one of the options that changes the trace
+ itself. When a trace is recorded, so is the stack
+ of functions. This allows for back traces of
+ trace sites.
+
+ Note: Some tracers have their own options. They only appear in this
+ file when the tracer is active. They always appear in the
+ options directory.
@@ -1562,12 +1576,12 @@ Doing the same with chrt -r 5 and function-trace set.
<idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit
<idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit
<idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit
- <idle>-0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit
- <idle>-0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz
+ <idle>-0 3dN.1 13us : cpu_load_update_nohz <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 13us : _raw_spin_lock <-cpu_load_update_nohz
<idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock
- <idle>-0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz
- <idle>-0 3dN.2 14us : sched_avg_update <-__update_cpu_load
- <idle>-0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz
+ <idle>-0 3dN.2 13us : __cpu_load_update <-cpu_load_update_nohz
+ <idle>-0 3dN.2 14us : sched_avg_update <-__cpu_load_update
+ <idle>-0 3dN.2 14us : _raw_spin_unlock <-cpu_load_update_nohz
<idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock
<idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit
<idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
diff --git a/Documentation/video4linux/CARDLIST.cx23885 b/Documentation/video4linux/CARDLIST.cx23885
index 44a4cfbfdc40..85a8fdcfcdaa 100644
--- a/Documentation/video4linux/CARDLIST.cx23885
+++ b/Documentation/video4linux/CARDLIST.cx23885
@@ -52,3 +52,5 @@
51 -> DVBSky T982 [4254:0982]
52 -> Hauppauge WinTV-HVR5525 [0070:f038]
53 -> Hauppauge WinTV Starburst [0070:c12a]
+ 54 -> ViewCast 260e [1576:0260]
+ 55 -> ViewCast 460e [1576:0460]
diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
index 67209998a439..6784220c6a16 100644
--- a/Documentation/video4linux/CARDLIST.em28xx
+++ b/Documentation/video4linux/CARDLIST.em28xx
@@ -76,9 +76,9 @@
75 -> Dikom DK300 (em2882)
76 -> KWorld PlusTV 340U or UB435-Q (ATSC) (em2870) [1b80:a340]
77 -> EM2874 Leadership ISDBT (em2874)
- 78 -> PCTV nanoStick T2 290e (em28174)
+ 78 -> PCTV nanoStick T2 290e (em28174) [2013:024f]
79 -> Terratec Cinergy H5 (em2884) [eb1a:2885,0ccd:10a2,0ccd:10ad,0ccd:10b6]
- 80 -> PCTV DVB-S2 Stick (460e) (em28174)
+ 80 -> PCTV DVB-S2 Stick (460e) (em28174) [2013:024c]
81 -> Hauppauge WinTV HVR 930C (em2884) [2040:1605]
82 -> Terratec Cinergy HTC Stick (em2884) [0ccd:00b2]
83 -> Honestech Vidbox NW03 (em2860) [eb1a:5006]
@@ -90,9 +90,11 @@
89 -> Delock 61959 (em2874) [1b80:e1cc]
90 -> KWorld USB ATSC TV Stick UB435-Q V2 (em2874) [1b80:e346]
91 -> SpeedLink Vicious And Devine Laplace webcam (em2765) [1ae7:9003,1ae7:9004]
- 92 -> PCTV DVB-S2 Stick (461e) (em28178)
+ 92 -> PCTV DVB-S2 Stick (461e) (em28178) [2013:0258]
93 -> KWorld USB ATSC TV Stick UB435-Q V3 (em2874) [1b80:e34c]
- 94 -> PCTV tripleStick (292e) (em28178)
+ 94 -> PCTV tripleStick (292e) (em28178) [2013:025f,2040:0264]
95 -> Leadtek VC100 (em2861) [0413:6f07]
- 96 -> Terratec Cinergy T2 Stick HD (em28178)
+ 96 -> Terratec Cinergy T2 Stick HD (em28178) [eb1a:8179]
97 -> Elgato EyeTV Hybrid 2008 INT (em2884) [0fd9:0018]
+ 98 -> PLEX PX-BCUD (em28178) [3275:0085]
+ 99 -> Hauppauge WinTV-dualHD DVB (em28174) [2040:0265]
diff --git a/Documentation/video4linux/vivid.txt b/Documentation/video4linux/vivid.txt
index e35d376b7f64..8da5d2a576bc 100644
--- a/Documentation/video4linux/vivid.txt
+++ b/Documentation/video4linux/vivid.txt
@@ -294,7 +294,7 @@ the result will be.
These inputs support all combinations of the field setting. Special care has
been taken to faithfully reproduce how fields are handled for the different
-TV standards. This is particularly noticable when generating a horizontally
+TV standards. This is particularly noticeable when generating a horizontally
moving image so the temporal effect of using interlaced formats becomes clearly
visible. For 50 Hz standards the top field is the oldest and the bottom field
is the newest in time. For 60 Hz standards that is reversed: the bottom field
@@ -313,7 +313,7 @@ will be SMPTE-170M.
The pixel aspect ratio will depend on the TV standard. The video aspect ratio
can be selected through the 'Standard Aspect Ratio' Vivid control.
Choices are '4x3', '16x9' which will give letterboxed widescreen video and
-'16x9 Anomorphic' which will give full screen squashed anamorphic widescreen
+'16x9 Anamorphic' which will give full screen squashed anamorphic widescreen
video that will need to be scaled accordingly.
The TV 'tuner' supports a frequency range of 44-958 MHz. Channels are available
@@ -862,7 +862,7 @@ RDS Radio Text:
RDS Stereo:
RDS Artificial Head:
RDS Compressed:
-RDS Dymanic PTY:
+RDS Dynamic PTY:
RDS Traffic Announcement:
RDS Traffic Program:
RDS Music: these are all controls that set the RDS data that is transmitted by
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4d0542c5206b..a4482cce4bae 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -199,8 +199,8 @@ Type: vm ioctl
Parameters: vcpu id (apic id on x86)
Returns: vcpu fd on success, -1 on error
-This API adds a vcpu to a virtual machine. The vcpu id is a small integer
-in the range [0, max_vcpus).
+This API adds a vcpu to a virtual machine. No more than max_vcpus may be added.
+The vcpu id is an integer in the range [0, max_vcpu_id).
The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
the KVM_CHECK_EXTENSION ioctl() at run-time.
@@ -212,6 +212,12 @@ cpus max.
If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
same as the value returned from KVM_CAP_NR_VCPUS.
+The maximum possible value for max_vcpu_id can be retrieved using the
+KVM_CAP_MAX_VCPU_ID of the KVM_CHECK_EXTENSION ioctl() at run-time.
+
+If the KVM_CAP_MAX_VCPU_ID does not exist, you should assume that max_vcpu_id
+is the same as the value returned from KVM_CAP_MAX_VCPUS.
+
On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
threads in one or more virtual CPU cores. (This is because the
hardware requires all the hardware threads in a CPU core to be in the
@@ -3788,6 +3794,14 @@ a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
Fails if VCPU has already been created, or if the irqchip is already in the
kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+7.6 KVM_CAP_S390_RI
+
+Architectures: s390
+Parameters: none
+
+Allows use of runtime-instrumentation introduced with zEC12 processor.
+Will return -EINVAL if the machine does not support runtime-instrumentation.
+Will return -EBUSY if a VCPU has already been created.
8. Other capabilities.
----------------------
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index e3e314cb83e8..6b0e115301c8 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -11,6 +11,7 @@ FLIC provides support to
- add interrupts (KVM_DEV_FLIC_ENQUEUE)
- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
+- purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ)
- enable/disable for the guest transparent async page faults
- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
@@ -40,6 +41,11 @@ Groups:
Simply deletes all elements from the list of currently pending floating
interrupts. No interrupts are injected into the guest.
+ KVM_DEV_FLIC_CLEAR_IO_IRQ
+ Deletes one (if any) I/O interrupt for a subchannel identified by the
+ subsystem identification word passed via the buffer specified by
+ attr->addr (address) and attr->attr (length).
+
KVM_DEV_FLIC_APF_ENABLE
Enables async page faults for the guest. So in case of a major page fault
the host is allowed to handle this async and continues the guest.
@@ -68,7 +74,7 @@ struct kvm_s390_io_adapter {
KVM_DEV_FLIC_ADAPTER_MODIFY
Modifies attributes of an existing I/O adapter interrupt source. Takes
- a kvm_s390_io_adapter_req specifiying the adapter and the operation:
+ a kvm_s390_io_adapter_req specifying the adapter and the operation:
struct kvm_s390_io_adapter_req {
__u32 id;
@@ -94,3 +100,9 @@ struct kvm_s390_io_adapter_req {
KVM_S390_IO_ADAPTER_UNMAP
release a userspace page for the translated address specified in addr
from the list of mappings
+
+Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
+FLIC with an unknown group or attribute gives the error code EINVAL (instead of
+ENXIO, as specified in the API documentation). It is not possible to conclude
+that a FLIC operation is unavailable based on the error code resulting from a
+usage attempt.
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index 54944c71b819..2a4ee6302122 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt
@@ -196,3 +196,35 @@ Another, more verbose way of getting PAT related debug messages is with
"debugpat" boot parameter. With this parameter, various debug messages are
printed to dmesg log.
+PAT Initialization
+------------------
+
+The following table describes how PAT is initialized under various
+configurations. The PAT MSR must be updated by Linux in order to support WC
+and WT attributes. Otherwise, the PAT MSR has the value programmed in it
+by the firmware. Note, Xen enables WC attribute in the PAT MSR for guests.
+
+ MTRR PAT Call Sequence PAT State PAT MSR
+ =========================================================
+ E E MTRR -> PAT init Enabled OS
+ E D MTRR -> PAT init Disabled -
+ D E MTRR -> PAT disable Disabled BIOS
+ D D MTRR -> PAT disable Disabled -
+ - np/E PAT -> PAT disable Disabled BIOS
+ - np/D PAT -> PAT disable Disabled -
+ E !P/E MTRR -> PAT init Disabled BIOS
+ D !P/E MTRR -> PAT disable Disabled BIOS
+ !M !P/E MTRR stub -> PAT disable Disabled BIOS
+
+ Legend
+ ------------------------------------------------
+ E Feature enabled in CPU
+ D Feature disabled/unsupported in CPU
+ np "nopat" boot option specified
+ !P CONFIG_X86_PAT option unset
+ !M CONFIG_MTRR option unset
+ Enabled PAT state set to enabled
+ Disabled PAT state set to disabled
+ OS PAT initializes PAT MSR with OS setting
+ BIOS PAT keeps PAT MSR with BIOS setting
+
diff --git a/Documentation/x86/protection-keys.txt b/Documentation/x86/protection-keys.txt
new file mode 100644
index 000000000000..c281ded1ba16
--- /dev/null
+++ b/Documentation/x86/protection-keys.txt
@@ -0,0 +1,27 @@
+Memory Protection Keys for Userspace (PKU aka PKEYs) is a CPU feature
+which will be found on future Intel CPUs.
+
+Memory Protection Keys provides a mechanism for enforcing page-based
+protections, but without requiring modification of the page tables
+when an application changes protection domains. It works by
+dedicating 4 previously ignored bits in each page table entry to a
+"protection key", giving 16 possible keys.
+
+There is also a new user-accessible register (PKRU) with two separate
+bits (Access Disable and Write Disable) for each key. Being a CPU
+register, PKRU is inherently thread-local, potentially giving each
+thread a different set of protections from every other thread.
+
+There are two new instructions (RDPKRU/WRPKRU) for reading and writing
+to the new register. The feature is only available in 64-bit mode,
+even though there is theoretically space in the PAE PTEs. These
+permissions are enforced on data access only and have no effect on
+instruction fetches.
+
+=========================== Config Option ===========================
+
+This config option adds approximately 1.5kb of text. and 50 bytes of
+data to the executable. A workload which does large O_DIRECT reads
+of holes in XFS files was run to exercise get_user_pages_fast(). No
+performance delta was observed with the config option
+enabled or disabled.
diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt
new file mode 100644
index 000000000000..06afac252f5b
--- /dev/null
+++ b/Documentation/x86/topology.txt
@@ -0,0 +1,208 @@
+x86 Topology
+============
+
+This documents and clarifies the main aspects of x86 topology modelling and
+representation in the kernel. Update/change when doing changes to the
+respective code.
+
+The architecture-agnostic topology definitions are in
+Documentation/cputopology.txt. This file holds x86-specific
+differences/specialities which must not necessarily apply to the generic
+definitions. Thus, the way to read up on Linux topology on x86 is to start
+with the generic one and look at this one in parallel for the x86 specifics.
+
+Needless to say, code should use the generic functions - this file is *only*
+here to *document* the inner workings of x86 topology.
+
+Started by Thomas Gleixner <tglx@linutronix.de> and Borislav Petkov <bp@alien8.de>.
+
+The main aim of the topology facilities is to present adequate interfaces to
+code which needs to know/query/use the structure of the running system wrt
+threads, cores, packages, etc.
+
+The kernel does not care about the concept of physical sockets because a
+socket has no relevance to software. It's an electromechanical component. In
+the past a socket always contained a single package (see below), but with the
+advent of Multi Chip Modules (MCM) a socket can hold more than one package. So
+there might be still references to sockets in the code, but they are of
+historical nature and should be cleaned up.
+
+The topology of a system is described in the units of:
+
+ - packages
+ - cores
+ - threads
+
+* Package:
+
+ Packages contain a number of cores plus shared resources, e.g. DRAM
+ controller, shared caches etc.
+
+ AMD nomenclature for package is 'Node'.
+
+ Package-related topology information in the kernel:
+
+ - cpuinfo_x86.x86_max_cores:
+
+ The number of cores in a package. This information is retrieved via CPUID.
+
+ - cpuinfo_x86.phys_proc_id:
+
+ The physical ID of the package. This information is retrieved via CPUID
+ and deduced from the APIC IDs of the cores in the package.
+
+ - cpuinfo_x86.logical_id:
+
+ The logical ID of the package. As we do not trust BIOSes to enumerate the
+ packages in a consistent way, we introduced the concept of logical package
+ ID so we can sanely calculate the number of maximum possible packages in
+ the system and have the packages enumerated linearly.
+
+ - topology_max_packages():
+
+ The maximum possible number of packages in the system. Helpful for per
+ package facilities to preallocate per package information.
+
+
+* Cores:
+
+ A core consists of 1 or more threads. It does not matter whether the threads
+ are SMT- or CMT-type threads.
+
+ AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses
+ "core".
+
+ Core-related topology information in the kernel:
+
+ - smp_num_siblings:
+
+ The number of threads in a core. The number of threads in a package can be
+ calculated by:
+
+ threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings
+
+
+* Threads:
+
+ A thread is a single scheduling unit. It's the equivalent to a logical Linux
+ CPU.
+
+ AMDs nomenclature for CMT threads is "Compute Unit Core". The kernel always
+ uses "thread".
+
+ Thread-related topology information in the kernel:
+
+ - topology_core_cpumask():
+
+ The cpumask contains all online threads in the package to which a thread
+ belongs.
+
+ The number of online threads is also printed in /proc/cpuinfo "siblings."
+
+ - topology_sibling_mask():
+
+ The cpumask contains all online threads in the core to which a thread
+ belongs.
+
+ - topology_logical_package_id():
+
+ The logical package ID to which a thread belongs.
+
+ - topology_physical_package_id():
+
+ The physical package ID to which a thread belongs.
+
+ - topology_core_id();
+
+ The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo
+ "core_id."
+
+
+
+System topology examples
+
+Note:
+
+The alternative Linux CPU enumeration depends on how the BIOS enumerates the
+threads. Many BIOSes enumerate all threads 0 first and then all threads 1.
+That has the "advantage" that the logical Linux CPU numbers of threads 0 stay
+the same whether threads are enabled or not. That's merely an implementation
+detail and has no practical impact.
+
+1) Single Package, Single Core
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+
+2) Single Package, Dual Core
+
+ a) One thread per core
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [core 1] -> [thread 0] -> Linux CPU 1
+
+ b) Two threads per core
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [thread 1] -> Linux CPU 1
+ -> [core 1] -> [thread 0] -> Linux CPU 2
+ -> [thread 1] -> Linux CPU 3
+
+ Alternative enumeration:
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [thread 1] -> Linux CPU 2
+ -> [core 1] -> [thread 0] -> Linux CPU 1
+ -> [thread 1] -> Linux CPU 3
+
+ AMD nomenclature for CMT systems:
+
+ [node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0
+ -> [Compute Unit Core 1] -> Linux CPU 1
+ -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 2
+ -> [Compute Unit Core 1] -> Linux CPU 3
+
+4) Dual Package, Dual Core
+
+ a) One thread per core
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [core 1] -> [thread 0] -> Linux CPU 1
+
+ [package 1] -> [core 0] -> [thread 0] -> Linux CPU 2
+ -> [core 1] -> [thread 0] -> Linux CPU 3
+
+ b) Two threads per core
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [thread 1] -> Linux CPU 1
+ -> [core 1] -> [thread 0] -> Linux CPU 2
+ -> [thread 1] -> Linux CPU 3
+
+ [package 1] -> [core 0] -> [thread 0] -> Linux CPU 4
+ -> [thread 1] -> Linux CPU 5
+ -> [core 1] -> [thread 0] -> Linux CPU 6
+ -> [thread 1] -> Linux CPU 7
+
+ Alternative enumeration:
+
+ [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0
+ -> [thread 1] -> Linux CPU 4
+ -> [core 1] -> [thread 0] -> Linux CPU 1
+ -> [thread 1] -> Linux CPU 5
+
+ [package 1] -> [core 0] -> [thread 0] -> Linux CPU 2
+ -> [thread 1] -> Linux CPU 6
+ -> [core 1] -> [thread 0] -> Linux CPU 3
+ -> [thread 1] -> Linux CPU 7
+
+ AMD nomenclature for CMT systems:
+
+ [node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0
+ -> [Compute Unit Core 1] -> Linux CPU 1
+ -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 2
+ -> [Compute Unit Core 1] -> Linux CPU 3
+
+ [node 1] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 4
+ -> [Compute Unit Core 1] -> Linux CPU 5
+ -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 6
+ -> [Compute Unit Core 1] -> Linux CPU 7
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index c518dce7da4d..5aa738346062 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -19,7 +19,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
ffffffef00000000 - ffffffff00000000 (=64 GB) EFI region mapping space
... unused hole ...
ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space
+ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
@@ -31,8 +31,8 @@ vmalloc space is lazily synchronized into the different PML4 pages of
the processes using the page fault handler, with init_level4_pgt as
reference.
-Current X86-64 implementations only support 40 bits of address space,
-but we support up to 46 bits. This expands into MBZ space in the page tables.
+Current X86-64 implementations support up to 46 bits of address space (64 TB),
+which is our current limit. This expands into MBZ space in the page tables.
We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
memory window (this size is arbitrary, it can be raised later if needed).