summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-hypervisor-pmu23
-rw-r--r--Documentation/Changes17
-rw-r--r--Documentation/DocBook/device-drivers.tmpl34
-rw-r--r--Documentation/DocBook/media/Makefile3
-rw-r--r--Documentation/DocBook/media/dvb/intro.xml5
-rw-r--r--Documentation/DocBook/media/v4l/controls.xml2
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-device-info.xml2
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-expbuf.xml38
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-parm.xml2
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-queryctrl.xml2
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv7604.txt21
-rw-r--r--Documentation/devicetree/bindings/media/i2c/tc358743.txt48
-rw-r--r--Documentation/devicetree/bindings/media/renesas,jpu.txt24
-rw-r--r--Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt89
-rw-r--r--Documentation/features/vm/TLB/arch-support.txt40
-rw-r--r--Documentation/filesystems/Locking3
-rw-r--r--Documentation/filesystems/nfs/nfs-rdma.txt16
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kbuild/kbuild.txt5
-rw-r--r--Documentation/kernel-parameters.txt16
-rw-r--r--Documentation/md-cluster.txt4
-rw-r--r--Documentation/module-signing.txt56
-rw-r--r--Documentation/security/Smack.txt27
-rw-r--r--Documentation/security/Yama.txt10
-rw-r--r--Documentation/trace/ftrace.txt51
-rw-r--r--Documentation/vm/userfaultfd.txt144
26 files changed, 603 insertions, 80 deletions
diff --git a/Documentation/ABI/testing/sysfs-hypervisor-pmu b/Documentation/ABI/testing/sysfs-hypervisor-pmu
new file mode 100644
index 000000000000..224faa105e18
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-hypervisor-pmu
@@ -0,0 +1,23 @@
+What: /sys/hypervisor/pmu/pmu_mode
+Date: August 2015
+KernelVersion: 4.3
+Contact: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Description:
+ Describes mode that Xen's performance-monitoring unit (PMU)
+ uses. Accepted values are
+ "off" -- PMU is disabled
+ "self" -- The guest can profile itself
+ "hv" -- The guest can profile itself and, if it is
+ privileged (e.g. dom0), the hypervisor
+ "all" -- The guest can profile itself, the hypervisor
+ and all other guests. Only available to
+ privileged guests.
+
+What: /sys/hypervisor/pmu/pmu_features
+Date: August 2015
+KernelVersion: 4.3
+Contact: Boris Ostrovsky <boris.ostrovsky@oracle.com>
+Description:
+ Describes Xen PMU features (as an integer). A set bit indicates
+ that the corresponding feature is enabled. See
+ include/xen/interface/xenpmu.h for available features
diff --git a/Documentation/Changes b/Documentation/Changes
index 646cdaa6e9d1..6d8863004858 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -43,6 +43,7 @@ o udev 081 # udevd --version
o grub 0.93 # grub --version || grub-install --version
o mcelog 0.6 # mcelog --version
o iptables 1.4.2 # iptables -V
+o openssl & libcrypto 1.0.1k # openssl version
Kernel compilation
@@ -79,6 +80,17 @@ BC
You will need bc to build kernels 3.10 and higher
+OpenSSL
+-------
+
+Module signing and external certificate handling use the OpenSSL program and
+crypto library to do key creation and signature generation.
+
+You will need openssl to build kernels 3.7 and higher if module signing is
+enabled. You will also need openssl development packages to build kernels 4.3
+and higher.
+
+
System utilities
================
@@ -295,6 +307,10 @@ Binutils
--------
o <ftp://ftp.kernel.org/pub/linux/devel/binutils/>
+OpenSSL
+-------
+o <https://www.openssl.org/>
+
System utilities
****************
@@ -392,4 +408,3 @@ o <http://oprofile.sf.net/download/>
NFS-Utils
---------
o <http://nfs.sourceforge.net/>
-
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index bbc1d7ee9c76..abba93f9d64a 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -217,6 +217,40 @@ X!Isound/sound_firmware.c
-->
</chapter>
+ <chapter id="mediadev">
+ <title>Media Devices</title>
+
+ <sect1><title>Video2Linux devices</title>
+!Iinclude/media/v4l2-async.h
+!Iinclude/media/v4l2-ctrls.h
+!Iinclude/media/v4l2-dv-timings.h
+!Iinclude/media/v4l2-event.h
+!Iinclude/media/v4l2-flash-led-class.h
+!Iinclude/media/v4l2-mediabus.h
+!Iinclude/media/v4l2-mem2mem.h
+!Iinclude/media/v4l2-of.h
+!Iinclude/media/v4l2-subdev.h
+!Iinclude/media/videobuf2-core.h
+!Iinclude/media/videobuf2-memops.h
+ </sect1>
+ <sect1><title>Digital TV (DVB) devices</title>
+!Idrivers/media/dvb-core/dvb_ca_en50221.h
+!Idrivers/media/dvb-core/dvb_frontend.h
+!Idrivers/media/dvb-core/dvb_math.h
+!Idrivers/media/dvb-core/dvb_ringbuffer.h
+!Idrivers/media/dvb-core/dvbdev.h
+ </sect1>
+ <sect1><title>Remote Controller devices</title>
+!Iinclude/media/rc-core.h
+ </sect1>
+ <sect1><title>Media Controller devices</title>
+!Iinclude/media/media-device.h
+!Iinclude/media/media-devnode.h
+!Iinclude/media/media-entity.h
+ </sect1>
+
+ </chapter>
+
<chapter id="uart16x50">
<title>16x50 UART Driver</title>
!Edrivers/tty/serial/serial_core.c
diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile
index 23996f88cd58..08527e7ea4d0 100644
--- a/Documentation/DocBook/media/Makefile
+++ b/Documentation/DocBook/media/Makefile
@@ -199,7 +199,8 @@ DVB_DOCUMENTED = \
#
install_media_images = \
- $(Q)-cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/*.svg $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api
+ $(Q)-mkdir $(MEDIA_OBJ_DIR)/media_api; \
+ cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/*.svg $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api
$(MEDIA_OBJ_DIR)/%: $(MEDIA_SRC_DIR)/%.b64
$(Q)base64 -d $< >$@
diff --git a/Documentation/DocBook/media/dvb/intro.xml b/Documentation/DocBook/media/dvb/intro.xml
index bcc72c216402..51db15648099 100644
--- a/Documentation/DocBook/media/dvb/intro.xml
+++ b/Documentation/DocBook/media/dvb/intro.xml
@@ -163,9 +163,8 @@ are called:</para>
<para>where N enumerates the DVB PCI cards in a system starting
from&#x00A0;0, and M enumerates the devices of each type within each
adapter, starting from&#x00A0;0, too. We will omit the &#8220;
-<constant>/dev/dvb/adapterN/</constant>&#8221; in the further dicussion
-of these devices. The naming scheme for the devices is the same wheter
-devfs is used or not.</para>
+<constant>/dev/dvb/adapterN/</constant>&#8221; in the further discussion
+of these devices.</para>
<para>More details about the data structures and function calls of all
the devices are described in the following chapters.</para>
diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index 6e1667b5f3eb..33aece541880 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -3414,7 +3414,7 @@ giving priority to the center of the metered area.</entry>
<row>
<entry><constant>V4L2_EXPOSURE_METERING_MATRIX</constant>&nbsp;</entry>
<entry>A multi-zone metering. The light intensity is measured
-in several points of the frame and the the results are combined. The
+in several points of the frame and the results are combined. The
algorithm of the zones selection and their significance in calculating the
final value is device dependent.</entry>
</row>
diff --git a/Documentation/DocBook/media/v4l/media-ioc-device-info.xml b/Documentation/DocBook/media/v4l/media-ioc-device-info.xml
index 2ce521419e67..b0a21ac300b8 100644
--- a/Documentation/DocBook/media/v4l/media-ioc-device-info.xml
+++ b/Documentation/DocBook/media/v4l/media-ioc-device-info.xml
@@ -102,7 +102,7 @@
</row>
<row>
<entry>__u32</entry>
- <entry><structfield>media_version</structfield></entry>
+ <entry><structfield>driver_version</structfield></entry>
<entry>Media device driver version, formatted with the
<constant>KERNEL_VERSION()</constant> macro. Together with the
<structfield>driver</structfield> field this identifies a particular
diff --git a/Documentation/DocBook/media/v4l/vidioc-expbuf.xml b/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
index a78c9207422f..0ae0b6a915d0 100644
--- a/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
@@ -62,28 +62,28 @@ buffer as a DMABUF file at any time after buffers have been allocated with the
&VIDIOC-REQBUFS; ioctl.</para>
<para> To export a buffer, applications fill &v4l2-exportbuffer;. The
-<structfield> type </structfield> field is set to the same buffer type as was
-previously used with &v4l2-requestbuffers;<structfield> type </structfield>.
-Applications must also set the <structfield> index </structfield> field. Valid
+<structfield>type</structfield> field is set to the same buffer type as was
+previously used with &v4l2-requestbuffers; <structfield>type</structfield>.
+Applications must also set the <structfield>index</structfield> field. Valid
index numbers range from zero to the number of buffers allocated with
-&VIDIOC-REQBUFS; (&v4l2-requestbuffers;<structfield> count </structfield>)
-minus one. For the multi-planar API, applications set the <structfield> plane
-</structfield> field to the index of the plane to be exported. Valid planes
+&VIDIOC-REQBUFS; (&v4l2-requestbuffers; <structfield>count</structfield>)
+minus one. For the multi-planar API, applications set the <structfield>plane</structfield>
+field to the index of the plane to be exported. Valid planes
range from zero to the maximal number of valid planes for the currently active
-format. For the single-planar API, applications must set <structfield> plane
-</structfield> to zero. Additional flags may be posted in the <structfield>
-flags </structfield> field. Refer to a manual for open() for details.
+format. For the single-planar API, applications must set <structfield>plane</structfield>
+to zero. Additional flags may be posted in the <structfield>flags</structfield>
+field. Refer to a manual for open() for details.
Currently only O_CLOEXEC, O_RDONLY, O_WRONLY, and O_RDWR are supported. All
other fields must be set to zero.
In the case of multi-planar API, every plane is exported separately using
-multiple <constant> VIDIOC_EXPBUF </constant> calls. </para>
+multiple <constant>VIDIOC_EXPBUF</constant> calls.</para>
-<para> After calling <constant>VIDIOC_EXPBUF</constant> the <structfield> fd
-</structfield> field will be set by a driver. This is a DMABUF file
+<para>After calling <constant>VIDIOC_EXPBUF</constant> the <structfield>fd</structfield>
+field will be set by a driver. This is a DMABUF file
descriptor. The application may pass it to other DMABUF-aware devices. Refer to
<link linkend="dmabuf">DMABUF importing</link> for details about importing
DMABUF files into V4L2 nodes. It is recommended to close a DMABUF file when it
-is no longer used to allow the associated memory to be reclaimed. </para>
+is no longer used to allow the associated memory to be reclaimed.</para>
</refsect1>
<refsect1>
@@ -170,9 +170,9 @@ multi-planar API. Otherwise this value must be set to zero. </entry>
<row>
<entry>__u32</entry>
<entry><structfield>flags</structfield></entry>
- <entry>Flags for the newly created file, currently only <constant>
-O_CLOEXEC </constant>, <constant>O_RDONLY</constant>, <constant>O_WRONLY
-</constant>, and <constant>O_RDWR</constant> are supported, refer to the manual
+ <entry>Flags for the newly created file, currently only
+<constant>O_CLOEXEC</constant>, <constant>O_RDONLY</constant>, <constant>O_WRONLY</constant>,
+and <constant>O_RDWR</constant> are supported, refer to the manual
of open() for more details.</entry>
</row>
<row>
@@ -200,9 +200,9 @@ set the array to zero.</entry>
<term><errorcode>EINVAL</errorcode></term>
<listitem>
<para>A queue is not in MMAP mode or DMABUF exporting is not
-supported or <structfield> flags </structfield> or <structfield> type
-</structfield> or <structfield> index </structfield> or <structfield> plane
-</structfield> fields are invalid.</para>
+supported or <structfield>flags</structfield> or <structfield>type</structfield>
+or <structfield>index</structfield> or <structfield>plane</structfield> fields
+are invalid.</para>
</listitem>
</varlistentry>
</variablelist>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-parm.xml b/Documentation/DocBook/media/v4l/vidioc-g-parm.xml
index f4e28e7d4751..721728745407 100644
--- a/Documentation/DocBook/media/v4l/vidioc-g-parm.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-g-parm.xml
@@ -267,7 +267,7 @@ is intended for still imaging applications. The idea is to get the
best possible image quality that the hardware can deliver. It is not
defined how the driver writer may achieve that; it will depend on the
hardware and the ingenuity of the driver writer. High quality mode is
-a different mode from the the regular motion video capture modes. In
+a different mode from the regular motion video capture modes. In
high quality mode:<itemizedlist>
<listitem>
<para>The driver may be able to capture higher
diff --git a/Documentation/DocBook/media/v4l/vidioc-queryctrl.xml b/Documentation/DocBook/media/v4l/vidioc-queryctrl.xml
index dc83ad70f8dc..6ec39c698baf 100644
--- a/Documentation/DocBook/media/v4l/vidioc-queryctrl.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-queryctrl.xml
@@ -616,7 +616,7 @@ pointer to memory containing the payload of the control.</entry>
<entry><constant>V4L2_CTRL_FLAG_EXECUTE_ON_WRITE</constant></entry>
<entry>0x0200</entry>
<entry>The value provided to the control will be propagated to the driver
-even if remains constant. This is required when the control represents an action
+even if it remains constant. This is required when the control represents an action
on the hardware. For example: clearing an error flag or triggering the flash. All the
controls of the type <constant>V4L2_CTRL_TYPE_BUTTON</constant> have this flag set.</entry>
</row>
diff --git a/Documentation/devicetree/bindings/media/i2c/adv7604.txt b/Documentation/devicetree/bindings/media/i2c/adv7604.txt
index c27cede3bd68..8337f75c75da 100644
--- a/Documentation/devicetree/bindings/media/i2c/adv7604.txt
+++ b/Documentation/devicetree/bindings/media/i2c/adv7604.txt
@@ -1,15 +1,17 @@
-* Analog Devices ADV7604/11 video decoder with HDMI receiver
+* Analog Devices ADV7604/11/12 video decoder with HDMI receiver
-The ADV7604 and ADV7611 are multiformat video decoders with an integrated HDMI
-receiver. The ADV7604 has four multiplexed HDMI inputs and one analog input,
-and the ADV7611 has one HDMI input and no analog input.
+The ADV7604 and ADV7611/12 are multiformat video decoders with an integrated
+HDMI receiver. The ADV7604 has four multiplexed HDMI inputs and one analog
+input, and the ADV7611 has one HDMI input and no analog input. The 7612 is
+similar to the 7611 but has 2 HDMI inputs.
-These device tree bindings support the ADV7611 only at the moment.
+These device tree bindings support the ADV7611/12 only at the moment.
Required Properties:
- compatible: Must contain one of the following
- "adi,adv7611" for the ADV7611
+ - "adi,adv7612" for the ADV7612
- reg: I2C slave address
@@ -22,10 +24,10 @@ port, in accordance with the video interface bindings defined in
Documentation/devicetree/bindings/media/video-interfaces.txt. The port nodes
are numbered as follows.
- Port ADV7611
+ Port ADV7611 ADV7612
------------------------------------------------------------
- HDMI 0
- Digital output 1
+ HDMI 0 0, 1
+ Digital output 1 2
The digital output port node must contain at least one endpoint.
@@ -45,6 +47,7 @@ Optional Endpoint Properties:
If none of hsync-active, vsync-active and pclk-sample is specified the
endpoint will use embedded BT.656 synchronization.
+ - default-input: Select which input is selected after reset.
Example:
@@ -58,6 +61,8 @@ Example:
#address-cells = <1>;
#size-cells = <0>;
+ default-input = <0>;
+
port@0 {
reg = <0>;
};
diff --git a/Documentation/devicetree/bindings/media/i2c/tc358743.txt b/Documentation/devicetree/bindings/media/i2c/tc358743.txt
new file mode 100644
index 000000000000..5218921629ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/tc358743.txt
@@ -0,0 +1,48 @@
+* Toshiba TC358743 HDMI-RX to MIPI CSI2-TX Bridge
+
+The Toshiba TC358743 HDMI-RX to MIPI CSI2-TX (H2C) is a bridge that converts
+a HDMI stream to MIPI CSI-2 TX. It is programmable through I2C.
+
+Required Properties:
+
+- compatible: value should be "toshiba,tc358743"
+- clocks, clock-names: should contain a phandle link to the reference clock
+ source, the clock input is named "refclk".
+
+Optional Properties:
+
+- reset-gpios: gpio phandle GPIO connected to the reset pin
+- interrupts, interrupt-parent: GPIO connected to the interrupt pin
+- data-lanes: should be <1 2 3 4> for four-lane operation,
+ or <1 2> for two-lane operation
+- clock-lanes: should be <0>
+- clock-noncontinuous: Presence of this boolean property decides whether the
+ MIPI CSI-2 clock is continuous or non-continuous.
+- link-frequencies: List of allowed link frequencies in Hz. Each frequency is
+ expressed as a 64-bit big-endian integer. The frequency
+ is half of the bps per lane due to DDR transmission.
+
+For further information on the MIPI CSI-2 endpoint node properties, see
+Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+
+ tc358743@0f {
+ compatible = "toshiba,tc358743";
+ reg = <0x0f>;
+ clocks = <&hdmi_osc>;
+ clock-names = "refclk";
+ reset-gpios = <&gpio6 9 GPIO_ACTIVE_LOW>;
+ interrupt-parent = <&gpio2>;
+ interrupts = <5 IRQ_TYPE_LEVEL_HIGH>;
+
+ port {
+ tc358743_out: endpoint {
+ remote-endpoint = <&mipi_csi2_in>;
+ data-lanes = <1 2 3 4>;
+ clock-lanes = <0>;
+ clock-noncontinuous;
+ link-frequencies = /bits/ 64 <297000000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/renesas,jpu.txt b/Documentation/devicetree/bindings/media/renesas,jpu.txt
new file mode 100644
index 000000000000..0cb94201bf92
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/renesas,jpu.txt
@@ -0,0 +1,24 @@
+* Renesas JPEG Processing Unit
+
+The JPEG processing unit (JPU) incorporates the JPEG codec with an encoding
+and decoding function conforming to the JPEG baseline process, so that the JPU
+can encode image data and decode JPEG data quickly.
+
+Required properties:
+ - compatible: should containg one of the following:
+ - "renesas,jpu-r8a7790" for R-Car H2
+ - "renesas,jpu-r8a7791" for R-Car M2-W
+ - "renesas,jpu-r8a7792" for R-Car V2H
+ - "renesas,jpu-r8a7793" for R-Car M2-N
+
+ - reg: Base address and length of the registers block for the JPU.
+ - interrupts: JPU interrupt specifier.
+ - clocks: A phandle + clock-specifier pair for the JPU functional clock.
+
+Example: R8A7790 (R-Car H2) JPU node
+ jpeg-codec@fe980000 {
+ compatible = "renesas,jpu-r8a7790";
+ reg = <0 0xfe980000 0 0x10300>;
+ interrupts = <0 272 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp1_clks R8A7790_CLK_JPU>;
+ };
diff --git a/Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt b/Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt
new file mode 100644
index 000000000000..d4def767bdfe
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/stih407-c8sectpfe.txt
@@ -0,0 +1,89 @@
+STMicroelectronics STi c8sectpfe binding
+============================================
+
+This document describes the c8sectpfe device bindings that is used to get transport
+stream data into the SoC on the TS pins, and into DDR for further processing.
+
+It is typically used in conjunction with one or more demodulator and tuner devices
+which converts from the RF to digital domain. Demodulators and tuners are usually
+located on an external DVB frontend card connected to SoC TS input pins.
+
+Currently 7 TS input (tsin) channels are supported on the stih407 family SoC.
+
+Required properties (controller (parent) node):
+- compatible : Should be "stih407-c8sectpfe"
+
+- reg : Address and length of register sets for each device in
+ "reg-names"
+
+- reg-names : The names of the register addresses corresponding to the
+ registers filled in "reg":
+ - c8sectpfe: c8sectpfe registers
+ - c8sectpfe-ram: c8sectpfe internal sram
+
+- clocks : phandle list of c8sectpfe clocks
+- clock-names : should be "c8sectpfe"
+See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+- pinctrl-names : a pinctrl state named tsin%d-serial or tsin%d-parallel (where %d is tsin-num)
+ must be defined for each tsin child node.
+- pinctrl-0 : phandle referencing pin configuration for this tsin configuration
+See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+
+
+Required properties (tsin (child) node):
+
+- tsin-num : tsin id of the InputBlock (must be between 0 to 6)
+- i2c-bus : phandle to the I2C bus DT node which the demodulators & tuners on this tsin channel are connected.
+- rst-gpio : reset gpio for this tsin channel.
+
+Optional properties (tsin (child) node):
+
+- invert-ts-clk : Bool property to control sense of ts input clock (data stored on falling edge of clk).
+- serial-not-parallel : Bool property to configure input bus width (serial on ts_data<7>).
+- async-not-sync : Bool property to control if data is received in asynchronous mode
+ (all bits/bytes with ts_valid or ts_packet asserted are valid).
+
+- dvb-card : Describes the NIM card connected to this tsin channel.
+
+Example:
+
+/* stih410 SoC b2120 + b2004a + stv0367-pll(NIMB) + stv0367-tda18212 (NIMA) DT example) */
+
+ c8sectpfe@08a20000 {
+ compatible = "st,stih407-c8sectpfe";
+ status = "okay";
+ reg = <0x08a20000 0x10000>, <0x08a00000 0x4000>;
+ reg-names = "stfe", "stfe-ram";
+ interrupts = <0 34 0>, <0 35 0>;
+ interrupt-names = "stfe-error-irq", "stfe-idle-irq";
+
+ pinctrl-names = "tsin0-serial", "tsin0-parallel", "tsin3-serial",
+ "tsin4-serial", "tsin5-serial";
+
+ pinctrl-0 = <&pinctrl_tsin0_serial>;
+ pinctrl-1 = <&pinctrl_tsin0_parallel>;
+ pinctrl-2 = <&pinctrl_tsin3_serial>;
+ pinctrl-3 = <&pinctrl_tsin4_serial_alt3>;
+ pinctrl-4 = <&pinctrl_tsin5_serial_alt1>;
+
+ clocks = <&clk_s_c0_flexgen CLK_PROC_STFE>;
+ clock-names = "stfe";
+
+ /* tsin0 is TSA on NIMA */
+ tsin0: port@0 {
+ tsin-num = <0>;
+ serial-not-parallel;
+ i2c-bus = <&ssc2>;
+ rst-gpio = <&pio15 4 0>;
+ dvb-card = <STV0367_TDA18212_NIMA_1>;
+ };
+
+ tsin3: port@3 {
+ tsin-num = <3>;
+ serial-not-parallel;
+ i2c-bus = <&ssc3>;
+ rst-gpio = <&pio15 7 0>;
+ dvb-card = <STV0367_TDA18212_NIMB_1>;
+ };
+ };
diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt
new file mode 100644
index 000000000000..261b92e2fb1a
--- /dev/null
+++ b/Documentation/features/vm/TLB/arch-support.txt
@@ -0,0 +1,40 @@
+#
+# Feature name: batch-unmap-tlb-flush
+# Kconfig: ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+# description: arch supports deferral of TLB flush until multiple pages are unmapped
+#
+ -----------------------
+ | arch |status|
+ -----------------------
+ | alpha: | TODO |
+ | arc: | TODO |
+ | arm: | TODO |
+ | arm64: | TODO |
+ | avr32: | .. |
+ | blackfin: | TODO |
+ | c6x: | .. |
+ | cris: | .. |
+ | frv: | .. |
+ | h8300: | .. |
+ | hexagon: | TODO |
+ | ia64: | TODO |
+ | m32r: | TODO |
+ | m68k: | .. |
+ | metag: | TODO |
+ | microblaze: | .. |
+ | mips: | TODO |
+ | mn10300: | TODO |
+ | nios2: | .. |
+ | openrisc: | .. |
+ | parisc: | TODO |
+ | powerpc: | TODO |
+ | s390: | TODO |
+ | score: | .. |
+ | sh: | TODO |
+ | sparc: | TODO |
+ | tile: | TODO |
+ | um: | .. |
+ | unicore32: | .. |
+ | x86: | ok |
+ | xtensa: | TODO |
+ -----------------------
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 6a34a0f4d37c..06d443450f21 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -397,7 +397,8 @@ prototypes:
int (*release) (struct gendisk *, fmode_t);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
- int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
+ int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+ unsigned long *);
int (*media_changed) (struct gendisk *);
void (*unlock_native_capacity) (struct gendisk *);
int (*revalidate_disk) (struct gendisk *);
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
index 95c13aa575ff..906b6c233f62 100644
--- a/Documentation/filesystems/nfs/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -138,9 +138,9 @@ Installation
- Build, install, reboot
The NFS/RDMA code will be enabled automatically if NFS and RDMA
- are turned on. The NFS/RDMA client and server are configured via the
- SUNRPC_XPRT_RDMA_CLIENT and SUNRPC_XPRT_RDMA_SERVER config options that both
- depend on SUNRPC and INFINIBAND. The default value of both options will be:
+ are turned on. The NFS/RDMA client and server are configured via the hidden
+ SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
+ value of SUNRPC_XPRT_RDMA will be:
- N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
and server will not be built
@@ -238,9 +238,8 @@ NFS/RDMA Setup
- Start the NFS server
- If the NFS/RDMA server was built as a module
- (CONFIG_SUNRPC_XPRT_RDMA_SERVER=m in kernel config), load the RDMA
- transport module:
+ If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+ kernel config), load the RDMA transport module:
$ modprobe svcrdma
@@ -259,9 +258,8 @@ NFS/RDMA Setup
- On the client system
- If the NFS/RDMA client was built as a module
- (CONFIG_SUNRPC_XPRT_RDMA_CLIENT=m in kernel config), load the RDMA client
- module:
+ If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in
+ kernel config), load the RDMA client module:
$ modprobe xprtrdma.ko
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 64df08db4657..39ac6546d4a4 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -303,6 +303,7 @@ Code Seq#(hex) Include File Comments
0xA3 80-8F Port ACL in development:
<mailto:tlewis@mindspring.com>
0xA3 90-9F linux/dtlk.h
+0xAA 00-3F linux/uapi/linux/userfaultfd.h
0xAB 00-1F linux/nbd.h
0xAC 00-1F linux/raw.h
0xAD 00 Netfilter device in development:
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index 6466704d47b5..0ff6a466a05b 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -174,6 +174,11 @@ The output directory is often set using "O=..." on the commandline.
The value can be overridden in which case the default value is ignored.
+KBUILD_SIGN_PIN
+--------------------------------------------------
+This variable allows a passphrase or PIN to be passed to the sign-file
+utility when signing kernel modules, if the private key requires such.
+
KBUILD_MODPOST_WARN
--------------------------------------------------
KBUILD_MODPOST_WARN can be set to avoid errors in case of undefined
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f2529286cd66..22a4b687ea5b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2285,6 +2285,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
The default parameter value of '0' causes the kernel
not to attempt recovery of lost locks.
+ nfs4.layoutstats_timer =
+ [NFSv4.2] Change the rate at which the kernel sends
+ layoutstats to the pNFS metadata server.
+
+ Setting this to value to 0 causes the kernel to use
+ whatever value is the default set by the layout
+ driver. A non-zero value sets the minimum interval
+ in seconds between layoutstats transmissions.
+
nfsd.nfs4_disable_idmapping=
[NFSv4] When set to the default of '1', the NFSv4
server will return only numeric uids and gids to
@@ -4097,6 +4106,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
plus one apbt timer for broadcast timer.
x86_intel_mid_timer=apbt_only | lapic_and_apbt
+ xen_512gb_limit [KNL,X86-64,XEN]
+ Restricts the kernel running paravirtualized under Xen
+ to use only up to 512 GB of RAM. The reason to do so is
+ crash analysis tools and Xen tools for doing domain
+ save/restore/migration must be enabled to handle larger
+ domains.
+
xen_emul_unplug= [HW,X86,XEN]
Unplug Xen emulated devices
Format: [unplug0,][unplug1]
diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt
index de1af7db3355..1b794369e03a 100644
--- a/Documentation/md-cluster.txt
+++ b/Documentation/md-cluster.txt
@@ -91,7 +91,7 @@ The algorithm is:
this message inappropriate or redundant.
3. sender write LVB.
- sender down-convert MESSAGE from EX to CR
+ sender down-convert MESSAGE from EX to CW
sender try to get EX of ACK
[ wait until all receiver has *processed* the MESSAGE ]
@@ -112,7 +112,7 @@ The algorithm is:
sender down-convert ACK from EX to CR
sender release MESSAGE
sender release TOKEN
- receiver upconvert to EX of MESSAGE
+ receiver upconvert to PR of MESSAGE
receiver get CR of ACK
receiver release MESSAGE
diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
index c72702ec1ded..a78bf1ffa68c 100644
--- a/Documentation/module-signing.txt
+++ b/Documentation/module-signing.txt
@@ -89,6 +89,32 @@ This has a number of options available:
their signatures checked without causing a dependency loop.
+ (4) "File name or PKCS#11 URI of module signing key" (CONFIG_MODULE_SIG_KEY)
+
+ Setting this option to something other than its default of
+ "certs/signing_key.pem" will disable the autogeneration of signing keys
+ and allow the kernel modules to be signed with a key of your choosing.
+ The string provided should identify a file containing both a private key
+ and its corresponding X.509 certificate in PEM form, or — on systems where
+ the OpenSSL ENGINE_pkcs11 is functional — a PKCS#11 URI as defined by
+ RFC7512. In the latter case, the PKCS#11 URI should reference both a
+ certificate and a private key.
+
+ If the PEM file containing the private key is encrypted, or if the
+ PKCS#11 token requries a PIN, this can be provided at build time by
+ means of the KBUILD_SIGN_PIN variable.
+
+
+ (5) "Additional X.509 keys for default system keyring" (CONFIG_SYSTEM_TRUSTED_KEYS)
+
+ This option can be set to the filename of a PEM-encoded file containing
+ additional certificates which will be included in the system keyring by
+ default.
+
+Note that enabling module signing adds a dependency on the OpenSSL devel
+packages to the kernel build processes for the tool that does the signing.
+
+
=======================
GENERATING SIGNING KEYS
=======================
@@ -100,16 +126,16 @@ it can be deleted or stored securely. The public key gets built into the
kernel so that it can be used to check the signatures as the modules are
loaded.
-Under normal conditions, the kernel build will automatically generate a new
-keypair using openssl if one does not exist in the files:
+Under normal conditions, when CONFIG_MODULE_SIG_KEY is unchanged from its
+default, the kernel build will automatically generate a new keypair using
+openssl if one does not exist in the file:
- signing_key.priv
- signing_key.x509
+ certs/signing_key.pem
during the building of vmlinux (the public part of the key needs to be built
into vmlinux) using parameters in the:
- x509.genkey
+ certs/x509.genkey
file (which is also generated if it does not already exist).
@@ -135,8 +161,12 @@ kernel sources tree and the openssl command. The following is an example to
generate the public/private key files:
openssl req -new -nodes -utf8 -sha256 -days 36500 -batch -x509 \
- -config x509.genkey -outform DER -out signing_key.x509 \
- -keyout signing_key.priv
+ -config x509.genkey -outform PEM -out kernel_key.pem \
+ -keyout kernel_key.pem
+
+The full pathname for the resulting kernel_key.pem file can then be specified
+in the CONFIG_MODULE_SIG_KEY option, and the certificate and key therein will
+be used instead of an autogenerated keypair.
=========================
@@ -152,10 +182,9 @@ in a keyring called ".system_keyring" that can be seen by:
302d2d52 I------ 1 perm 1f010000 0 0 asymmetri Fedora kernel signing key: d69a84e6bce3d216b979e9505b3e3ef9a7118079: X509.RSA a7118079 []
...
-Beyond the public key generated specifically for module signing, any file
-placed in the kernel source root directory or the kernel build root directory
-whose name is suffixed with ".x509" will be assumed to be an X.509 public key
-and will be added to the keyring.
+Beyond the public key generated specifically for module signing, additional
+trusted certificates can be provided in a PEM-encoded file referenced by the
+CONFIG_SYSTEM_TRUSTED_KEYS configuration option.
Further, the architecture code may take public keys from a hardware store and
add those in also (e.g. from the UEFI key database).
@@ -181,7 +210,7 @@ To manually sign a module, use the scripts/sign-file tool available in
the Linux kernel source tree. The script requires 4 arguments:
1. The hash algorithm (e.g., sha256)
- 2. The private key filename
+ 2. The private key filename or PKCS#11 URI
3. The public key filename
4. The kernel module to be signed
@@ -194,6 +223,9 @@ The hash algorithm used does not have to match the one configured, but if it
doesn't, you should make sure that hash algorithm is either built into the
kernel or can be loaded without requiring itself.
+If the private key requires a passphrase or PIN, it can be provided in the
+$KBUILD_SIGN_PIN environment variable.
+
============================
SIGNED MODULES AND STRIPPING
diff --git a/Documentation/security/Smack.txt b/Documentation/security/Smack.txt
index de5e1aeca7fb..5e6d07fbed07 100644
--- a/Documentation/security/Smack.txt
+++ b/Documentation/security/Smack.txt
@@ -28,6 +28,10 @@ Smack kernels use the CIPSO IP option. Some network
configurations are intolerant of IP options and can impede
access to systems that use them as Smack does.
+Smack is used in the Tizen operating system. Please
+go to http://wiki.tizen.org for information about how
+Smack is used in Tizen.
+
The current git repository for Smack user space is:
git://github.com/smack-team/smack.git
@@ -108,6 +112,8 @@ in the smackfs filesystem. This pseudo-filesystem is mounted
on /sys/fs/smackfs.
access
+ Provided for backward compatibility. The access2 interface
+ is preferred and should be used instead.
This interface reports whether a subject with the specified
Smack label has a particular access to an object with a
specified Smack label. Write a fixed format access rule to
@@ -136,6 +142,8 @@ change-rule
those in the fourth string. If there is no such rule it will be
created using the access specified in the third and the fourth strings.
cipso
+ Provided for backward compatibility. The cipso2 interface
+ is preferred and should be used instead.
This interface allows a specific CIPSO header to be assigned
to a Smack label. The format accepted on write is:
"%24s%4d%4d"["%4d"]...
@@ -157,7 +165,19 @@ direct
doi
This contains the CIPSO domain of interpretation used in
network packets.
+ipv6host
+ This interface allows specific IPv6 internet addresses to be
+ treated as single label hosts. Packets are sent to single
+ label hosts only from processes that have Smack write access
+ to the host label. All packets received from single label hosts
+ are given the specified label. The format accepted on write is:
+ "%h:%h:%h:%h:%h:%h:%h:%h label" or
+ "%h:%h:%h:%h:%h:%h:%h:%h/%d label".
+ The "::" address shortcut is not supported.
+ If label is "-DELETE" a matched entry will be deleted.
load
+ Provided for backward compatibility. The load2 interface
+ is preferred and should be used instead.
This interface allows access control rules in addition to
the system defined rules to be specified. The format accepted
on write is:
@@ -181,6 +201,8 @@ load2
permissions that are not allowed. The string "r-x--" would
specify read and execute access.
load-self
+ Provided for backward compatibility. The load-self2 interface
+ is preferred and should be used instead.
This interface allows process specific access rules to be
defined. These rules are only consulted if access would
otherwise be permitted, and are intended to provide additional
@@ -205,6 +227,8 @@ netlabel
received from single label hosts are given the specified
label. The format accepted on write is:
"%d.%d.%d.%d label" or "%d.%d.%d.%d/%d label".
+ If the label specified is "-CIPSO" the address is treated
+ as a host that supports CIPSO headers.
onlycap
This contains labels processes must have for CAP_MAC_ADMIN
and CAP_MAC_OVERRIDE to be effective. If this file is empty
@@ -232,7 +256,8 @@ unconfined
is dangerous and can ruin the proper labeling of your system.
It should never be used in production.
-You can add access rules in /etc/smack/accesses. They take the form:
+If you are using the smackload utility
+you can add access rules in /etc/smack/accesses. They take the form:
subjectlabel objectlabel access
diff --git a/Documentation/security/Yama.txt b/Documentation/security/Yama.txt
index 227a63f018a2..d9ee7d7a6c7f 100644
--- a/Documentation/security/Yama.txt
+++ b/Documentation/security/Yama.txt
@@ -1,9 +1,7 @@
-Yama is a Linux Security Module that collects a number of system-wide DAC
-security protections that are not handled by the core kernel itself. To
-select it at boot time, specify "security=yama" (though this will disable
-any other LSM).
-
-Yama is controlled through sysctl in /proc/sys/kernel/yama:
+Yama is a Linux Security Module that collects system-wide DAC security
+protections that are not handled by the core kernel itself. This is
+selectable at build-time with CONFIG_SECURITY_YAMA, and can be controlled
+at run-time through sysctls in /proc/sys/kernel/yama:
- ptrace_scope
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 87bb4aa6a6b9..ef621d34ba5b 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -691,6 +691,8 @@ The above is mostly meaningful for kernel developers.
The marks are determined by the difference between this
current trace and the next trace.
'$' - greater than 1 second
+ '@' - greater than 100 milisecond
+ '*' - greater than 10 milisecond
'#' - greater than 1000 microsecond
'!' - greater than 100 microsecond
'+' - greater than 10 microsecond
@@ -1944,26 +1946,49 @@ want, depending on your needs.
ie:
- 0) | up_write() {
- 0) 0.646 us | _spin_lock_irqsave();
- 0) 0.684 us | _spin_unlock_irqrestore();
- 0) 3.123 us | }
- 0) 0.548 us | fput();
- 0) + 58.628 us | }
+ 3) # 1837.709 us | } /* __switch_to */
+ 3) | finish_task_switch() {
+ 3) 0.313 us | _raw_spin_unlock_irq();
+ 3) 3.177 us | }
+ 3) # 1889.063 us | } /* __schedule */
+ 3) ! 140.417 us | } /* __schedule */
+ 3) # 2034.948 us | } /* schedule */
+ 3) * 33998.59 us | } /* schedule_preempt_disabled */
[...]
- 0) | putname() {
- 0) | kmem_cache_free() {
- 0) 0.518 us | __phys_addr();
- 0) 1.757 us | }
- 0) 2.861 us | }
- 0) ! 115.305 us | }
- 0) ! 116.402 us | }
+ 1) 0.260 us | msecs_to_jiffies();
+ 1) 0.313 us | __rcu_read_unlock();
+ 1) + 61.770 us | }
+ 1) + 64.479 us | }
+ 1) 0.313 us | rcu_bh_qs();
+ 1) 0.313 us | __local_bh_enable();
+ 1) ! 217.240 us | }
+ 1) 0.365 us | idle_cpu();
+ 1) | rcu_irq_exit() {
+ 1) 0.417 us | rcu_eqs_enter_common.isra.47();
+ 1) 3.125 us | }
+ 1) ! 227.812 us | }
+ 1) ! 457.395 us | }
+ 1) @ 119760.2 us | }
+
+ [...]
+
+ 2) | handle_IPI() {
+ 1) 6.979 us | }
+ 2) 0.417 us | scheduler_ipi();
+ 1) 9.791 us | }
+ 1) + 12.917 us | }
+ 2) 3.490 us | }
+ 1) + 15.729 us | }
+ 1) + 18.542 us | }
+ 2) $ 3594274 us | }
+ means that the function exceeded 10 usecs.
! means that the function exceeded 100 usecs.
# means that the function exceeded 1000 usecs.
+ * means that the function exceeded 10 msecs.
+ @ means that the function exceeded 100 msecs.
$ means that the function exceeded 1 sec.
diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/vm/userfaultfd.txt
new file mode 100644
index 000000000000..70a3c94d1941
--- /dev/null
+++ b/Documentation/vm/userfaultfd.txt
@@ -0,0 +1,144 @@
+= Userfaultfd =
+
+== Objective ==
+
+Userfaults allow the implementation of on-demand paging from userland
+and more generally they allow userland to take control of various
+memory page faults, something otherwise only the kernel code could do.
+
+For example userfaults allows a proper and more optimal implementation
+of the PROT_NONE+SIGSEGV trick.
+
+== Design ==
+
+Userfaults are delivered and resolved through the userfaultfd syscall.
+
+The userfaultfd (aside from registering and unregistering virtual
+memory ranges) provides two primary functionalities:
+
+1) read/POLLIN protocol to notify a userland thread of the faults
+ happening
+
+2) various UFFDIO_* ioctls that can manage the virtual memory regions
+ registered in the userfaultfd that allows userland to efficiently
+ resolve the userfaults it receives via 1) or to manage the virtual
+ memory in the background
+
+The real advantage of userfaults if compared to regular virtual memory
+management of mremap/mprotect is that the userfaults in all their
+operations never involve heavyweight structures like vmas (in fact the
+userfaultfd runtime load never takes the mmap_sem for writing).
+
+Vmas are not suitable for page- (or hugepage) granular fault tracking
+when dealing with virtual address spaces that could span
+Terabytes. Too many vmas would be needed for that.
+
+The userfaultfd once opened by invoking the syscall, can also be
+passed using unix domain sockets to a manager process, so the same
+manager process could handle the userfaults of a multitude of
+different processes without them being aware about what is going on
+(well of course unless they later try to use the userfaultfd
+themselves on the same region the manager is already tracking, which
+is a corner case that would currently return -EBUSY).
+
+== API ==
+
+When first opened the userfaultfd must be enabled invoking the
+UFFDIO_API ioctl specifying a uffdio_api.api value set to UFFD_API (or
+a later API version) which will specify the read/POLLIN protocol
+userland intends to speak on the UFFD and the uffdio_api.features
+userland requires. The UFFDIO_API ioctl if successful (i.e. if the
+requested uffdio_api.api is spoken also by the running kernel and the
+requested features are going to be enabled) will return into
+uffdio_api.features and uffdio_api.ioctls two 64bit bitmasks of
+respectively all the available features of the read(2) protocol and
+the generic ioctl available.
+
+Once the userfaultfd has been enabled the UFFDIO_REGISTER ioctl should
+be invoked (if present in the returned uffdio_api.ioctls bitmask) to
+register a memory range in the userfaultfd by setting the
+uffdio_register structure accordingly. The uffdio_register.mode
+bitmask will specify to the kernel which kind of faults to track for
+the range (UFFDIO_REGISTER_MODE_MISSING would track missing
+pages). The UFFDIO_REGISTER ioctl will return the
+uffdio_register.ioctls bitmask of ioctls that are suitable to resolve
+userfaults on the range registered. Not all ioctls will necessarily be
+supported for all memory types depending on the underlying virtual
+memory backend (anonymous memory vs tmpfs vs real filebacked
+mappings).
+
+Userland can use the uffdio_register.ioctls to manage the virtual
+address space in the background (to add or potentially also remove
+memory from the userfaultfd registered range). This means a userfault
+could be triggering just before userland maps in the background the
+user-faulted page.
+
+The primary ioctl to resolve userfaults is UFFDIO_COPY. That
+atomically copies a page into the userfault registered range and wakes
+up the blocked userfaults (unless uffdio_copy.mode &
+UFFDIO_COPY_MODE_DONTWAKE is set). Other ioctl works similarly to
+UFFDIO_COPY. They're atomic as in guaranteeing that nothing can see an
+half copied page since it'll keep userfaulting until the copy has
+finished.
+
+== QEMU/KVM ==
+
+QEMU/KVM is using the userfaultfd syscall to implement postcopy live
+migration. Postcopy live migration is one form of memory
+externalization consisting of a virtual machine running with part or
+all of its memory residing on a different node in the cloud. The
+userfaultfd abstraction is generic enough that not a single line of
+KVM kernel code had to be modified in order to add postcopy live
+migration to QEMU.
+
+Guest async page faults, FOLL_NOWAIT and all other GUP features work
+just fine in combination with userfaults. Userfaults trigger async
+page faults in the guest scheduler so those guest processes that
+aren't waiting for userfaults (i.e. network bound) can keep running in
+the guest vcpus.
+
+It is generally beneficial to run one pass of precopy live migration
+just before starting postcopy live migration, in order to avoid
+generating userfaults for readonly guest regions.
+
+The implementation of postcopy live migration currently uses one
+single bidirectional socket but in the future two different sockets
+will be used (to reduce the latency of the userfaults to the minimum
+possible without having to decrease /proc/sys/net/ipv4/tcp_wmem).
+
+The QEMU in the source node writes all pages that it knows are missing
+in the destination node, into the socket, and the migration thread of
+the QEMU running in the destination node runs UFFDIO_COPY|ZEROPAGE
+ioctls on the userfaultfd in order to map the received pages into the
+guest (UFFDIO_ZEROCOPY is used if the source page was a zero page).
+
+A different postcopy thread in the destination node listens with
+poll() to the userfaultfd in parallel. When a POLLIN event is
+generated after a userfault triggers, the postcopy thread read() from
+the userfaultfd and receives the fault address (or -EAGAIN in case the
+userfault was already resolved and waken by a UFFDIO_COPY|ZEROPAGE run
+by the parallel QEMU migration thread).
+
+After the QEMU postcopy thread (running in the destination node) gets
+the userfault address it writes the information about the missing page
+into the socket. The QEMU source node receives the information and
+roughly "seeks" to that page address and continues sending all
+remaining missing pages from that new page offset. Soon after that
+(just the time to flush the tcp_wmem queue through the network) the
+migration thread in the QEMU running in the destination node will
+receive the page that triggered the userfault and it'll map it as
+usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it
+was spontaneously sent by the source or if it was an urgent page
+requested through an userfault).
+
+By the time the userfaults start, the QEMU in the destination node
+doesn't need to keep any per-page state bitmap relative to the live
+migration around and a single per-page bitmap has to be maintained in
+the QEMU running in the source node to know which pages are still
+missing in the destination node. The bitmap in the source node is
+checked to find which missing pages to send in round robin and we seek
+over it when receiving incoming userfaults. After sending each page of
+course the bitmap is updated accordingly. It's also useful to avoid
+sending the same page twice (in case the userfault is read by the
+postcopy thread just before UFFDIO_COPY|ZEROPAGE runs in the migration
+thread).