From 3f1b623a1be92103386bcab818e25885d6be9419 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 23 Oct 2020 17:00:41 +0800
Subject: vdpa: introduce config op to get valid iova range

This patch introduce a config op to get valid iova range from the vDPA
device.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20201023090043.14430-2-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/vdpa.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index eae0bfd87d91..30bc7a7223bb 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -52,6 +52,16 @@ struct vdpa_device {
 	int nvqs;
 };
 
+/**
+ * vDPA IOVA range - the IOVA range support by the device
+ * @first: start of the IOVA range
+ * @last: end of the IOVA range
+ */
+struct vdpa_iova_range {
+	u64 first;
+	u64 last;
+};
+
 /**
  * vDPA_config_ops - operations for configuring a vDPA device.
  * Note: vDPA device drivers are required to implement all of the
@@ -151,6 +161,10 @@ struct vdpa_device {
  * @get_generation:		Get device config generation (optional)
  *				@vdev: vdpa device
  *				Returns u32: device generation
+ * @get_iova_range:		Get supported iova range (optional)
+ *				@vdev: vdpa device
+ *				Returns the iova range supported by
+ *				the device.
  * @set_map:			Set device memory mapping (optional)
  *				Needed for device that using device
  *				specific DMA translation (on-chip IOMMU)
@@ -216,6 +230,7 @@ struct vdpa_config_ops {
 	void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
 			   const void *buf, unsigned int len);
 	u32 (*get_generation)(struct vdpa_device *vdev);
+	struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);
 
 	/* DMA ops */
 	int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
-- 
cgit v1.2.3


From cb47755725da7b90fecbb2aa82ac3b24a7adb89b Mon Sep 17 00:00:00 2001
From: Zeng Tao <prime.zeng@hisilicon.com>
Date: Tue, 1 Sep 2020 17:30:13 +0800
Subject: time: Prevent undefined behaviour in timespec64_to_ns()

UBSAN reports:

Undefined behaviour in ./include/linux/time64.h:127:27
signed integer overflow:
17179869187 * 1000000000 cannot be represented in type 'long long int'
Call Trace:
 timespec64_to_ns include/linux/time64.h:127 [inline]
 set_cpu_itimer+0x65c/0x880 kernel/time/itimer.c:180
 do_setitimer+0x8e/0x740 kernel/time/itimer.c:245
 __x64_sys_setitimer+0x14c/0x2c0 kernel/time/itimer.c:336
 do_syscall_64+0xa1/0x540 arch/x86/entry/common.c:295

Commit bd40a175769d ("y2038: itimer: change implementation to timespec64")
replaced the original conversion which handled time clamping correctly with
timespec64_to_ns() which has no overflow protection.

Fix it in timespec64_to_ns() as this is not necessarily limited to the
usage in itimers.

[ tglx: Added comment and adjusted the fixes tag ]

Fixes: 361a3bf00582 ("time64: Add time64.h header and define struct timespec64")
Signed-off-by: Zeng Tao <prime.zeng@hisilicon.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/1598952616-6416-1-git-send-email-prime.zeng@hisilicon.com
---
 include/linux/time64.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time64.h b/include/linux/time64.h
index c9dcb3e5781f..5117cb5b5656 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -124,6 +124,10 @@ static inline bool timespec64_valid_settod(const struct timespec64 *ts)
  */
 static inline s64 timespec64_to_ns(const struct timespec64 *ts)
 {
+	/* Prevent multiplication overflow */
+	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+		return KTIME_MAX;
+
 	return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
 }
 
-- 
cgit v1.2.3


From 1c534352f47fd83eb08075ac2474f707e74bf7f7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 23 Oct 2020 17:35:19 +0200
Subject: cpufreq: Introduce CPUFREQ_NEED_UPDATE_LIMITS driver flag

Generally, a cpufreq driver may need to update some internal upper
and lower frequency boundaries on policy max and min changes,
respectively, but currently this does not work if the target
frequency does not change along with the policy limit.

Namely, if the target frequency does not change along with the
policy min or max, the "target_freq == policy->cur" check in
__cpufreq_driver_target() prevents driver callbacks from being
invoked and they do not even have a chance to update the
corresponding internal boundary.

This particularly affects the "powersave" and "performance"
governors that always set the target frequency to one of the
policy limits and it never changes when the other limit is updated.

To allow cpufreq the drivers needing to update internal frequency
boundaries on policy limits changes to avoid this issue, introduce
a new driver flag, CPUFREQ_NEED_UPDATE_LIMITS, that (when set) will
neutralize the check mentioned above.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/cpufreq.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index fa37b1c66443..038ed83aab41 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -298,7 +298,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 
 struct cpufreq_driver {
 	char		name[CPUFREQ_NAME_LEN];
-	u8		flags;
+	u16		flags;
 	void		*driver_data;
 
 	/* needed by all drivers */
@@ -422,6 +422,14 @@ struct cpufreq_driver {
  */
 #define CPUFREQ_IS_COOLING_DEV			BIT(7)
 
+/*
+ * Set by drivers that need to update internale upper and lower boundaries along
+ * with the target frequency and so the core and governors should also invoke
+ * the diver if the target frequency does not change, but the policy min or max
+ * may have changed.
+ */
+#define CPUFREQ_NEED_UPDATE_LIMITS		BIT(8)
+
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
-- 
cgit v1.2.3


From 1de111b51b829bcf01d2e57971f8fd07a665fa3f Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 23 Oct 2020 08:47:50 -0700
Subject: KVM: arm64: ARM_SMCCC_ARCH_WORKAROUND_1 doesn't return
 SMCCC_RET_NOT_REQUIRED

According to the SMCCC spec[1](7.5.2 Discovery) the
ARM_SMCCC_ARCH_WORKAROUND_1 function id only returns 0, 1, and
SMCCC_RET_NOT_SUPPORTED.

 0 is "workaround required and safe to call this function"
 1 is "workaround not required but safe to call this function"
 SMCCC_RET_NOT_SUPPORTED is "might be vulnerable or might not be, who knows, I give up!"

SMCCC_RET_NOT_SUPPORTED might as well mean "workaround required, except
calling this function may not work because it isn't implemented in some
cases". Wonderful. We map this SMC call to

 0 is SPECTRE_MITIGATED
 1 is SPECTRE_UNAFFECTED
 SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE

For KVM hypercalls (hvc), we've implemented this function id to return
SMCCC_RET_NOT_SUPPORTED, 0, and SMCCC_RET_NOT_REQUIRED. One of those
isn't supposed to be there. Per the code we call
arm64_get_spectre_v2_state() to figure out what to return for this
feature discovery call.

 0 is SPECTRE_MITIGATED
 SMCCC_RET_NOT_REQUIRED is SPECTRE_UNAFFECTED
 SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE

Let's clean this up so that KVM tells the guest this mapping:

 0 is SPECTRE_MITIGATED
 1 is SPECTRE_UNAFFECTED
 SMCCC_RET_NOT_SUPPORTED is SPECTRE_VULNERABLE

Note: SMCCC_RET_NOT_AFFECTED is 1 but isn't part of the SMCCC spec

Fixes: c118bbb52743 ("arm64: KVM: Propagate full Spectre v2 workaround state to KVM guests")
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Will Deacon <will@kernel.org>
Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Steven Price <steven.price@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
Link: https://developer.arm.com/documentation/den0028/latest [1]
Link: https://lore.kernel.org/r/20201023154751.1973872-1-swboyd@chromium.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/arm-smccc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 885c9ffc835c..f860645f6512 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -87,6 +87,8 @@
 			   ARM_SMCCC_SMC_32,				\
 			   0, 0x7fff)
 
+#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED	1
+
 /* Paravirtualised time calls (defined by ARM DEN0057A) */
 #define ARM_SMCCC_HV_PV_TIME_FEATURES				\
 	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,			\
-- 
cgit v1.2.3


From cbdc0f54560f94c2205ddbebb5464d65868af0d8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Oct 2020 18:33:18 +0200
Subject: usb: fix kernel-doc markups

There is a common comment marked, instead, with kernel-doc
notation.

Also, some identifiers have different names between their
prototypes and the kernel-doc markup.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Felipe Balbi <balbi@kernel.org>
Link: https://lore.kernel.org/r/0b964be3884def04fcd20ea5c12cb90d0014871c.1603469755.git.mchehab+huawei@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/composite.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 2040696d75b6..a2d229ab63ba 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -437,7 +437,7 @@ static inline struct usb_composite_driver *to_cdriver(
 #define OS_STRING_IDX			0xEE
 
 /**
- * struct usb_composite_device - represents one composite usb gadget
+ * struct usb_composite_dev - represents one composite usb gadget
  * @gadget: read-only, abstracts the gadget's usb peripheral controller
  * @req: used for control responses; buffer is pre-allocated
  * @os_desc_req: used for OS descriptors responses; buffer is pre-allocated
-- 
cgit v1.2.3


From 13150bc5416f45234c955e5bed91623d178c6117 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 27 Oct 2020 16:11:32 +0100
Subject: module: use hidden visibility for weak symbol references

Geert reports that commit be2881824ae9eb92 ("arm64/build: Assert for
unwanted sections") results in build errors on arm64 for configurations
that have CONFIG_MODULES disabled.

The commit in question added ASSERT()s to the arm64 linker script to
ensure that linker generated sections such as .got.plt etc are empty,
but as it turns out, there are corner cases where the linker does emit
content into those sections. More specifically, weak references to
function symbols (which can remain unsatisfied, and can therefore not
be emitted as relative references) will be emitted as GOT and PLT
entries when linking the kernel in PIE mode (which is the case when
CONFIG_RELOCATABLE is enabled, which is on by default).

What happens is that code such as

	struct device *(*fn)(struct device *dev);
	struct device *iommu_device;

	fn = symbol_get(mdev_get_iommu_device);
	if (fn) {
		iommu_device = fn(dev);

essentially gets converted into the following when CONFIG_MODULES is off:

	struct device *iommu_device;

	if (&mdev_get_iommu_device) {
		iommu_device = mdev_get_iommu_device(dev);

where mdev_get_iommu_device is emitted as a weak symbol reference into
the object file. The first reference is decorated with an ordinary
ABS64 data relocation (which yields 0x0 if the reference remains
unsatisfied). However, the indirect call is turned into a direct call
covered by a R_AARCH64_CALL26 relocation, which is converted into a
call via a PLT entry taking the target address from the associated
GOT entry.

Given that such GOT and PLT entries are unnecessary for fully linked
binaries such as the kernel, let's give these weak symbol references
hidden visibility, so that the linker knows that the weak reference
via R_AARCH64_CALL26 can simply remain unsatisfied.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Fangrui Song <maskray@google.com>
Acked-by: Jessica Yu <jeyu@kernel.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20201027151132.14066-1-ardb@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/module.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 7ccdf87f376f..6264617bab4d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -740,7 +740,7 @@ static inline bool within_module(unsigned long addr, const struct module *mod)
 }
 
 /* Get/put a kernel symbol (calls should be symmetric) */
-#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak)); &(x); })
+#define symbol_get(x) ({ extern typeof(x) x __attribute__((weak,visibility("hidden"))); &(x); })
 #define symbol_put(x) do { } while (0)
 #define symbol_put_addr(x) do { } while (0)
 
-- 
cgit v1.2.3


From 6a6223ec7779dfdabb9c2567bb42079bc300cf27 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 27 Oct 2020 10:51:13 +0100
Subject: blk-mq: docs: add kernel-doc description for a new struct member

As reported by kernel-doc:
	./include/linux/blk-mq.h:267: warning: Function parameter or member 'active_queues_shared_sbitmap' not described in 'blk_mq_tag_set'

There is now a new member for struct blk_mq_tag_set. Add a
description for it, based on the commit that introduced it.

Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/8e513153b83eefc05e358f51f2632b592c3f6772.1603791716.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/blk-mq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b23eeca4d677..794b2a33a2c3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -235,6 +235,8 @@ enum hctx_type {
  * @flags:	   Zero or more BLK_MQ_F_* flags.
  * @driver_data:   Pointer to data owned by the block driver that created this
  *		   tag set.
+ * @active_queues_shared_sbitmap:
+ * 		   number of active request queues per tag set.
  * @__bitmap_tags: A shared tags sbitmap, used over all hctx's
  * @__breserved_tags:
  *		   A shared reserved tags sbitmap, used over all hctx's
-- 
cgit v1.2.3


From 89b422354409c275e898d26607201797cc05a932 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 27 Oct 2020 10:51:17 +0100
Subject: mm: pagemap.h: fix two kernel-doc markups

Changeset a8cf7f272b5a ("mm: add find_lock_head") renamed the
index parameter, but forgot to update the kernel-doc markups
accordingly.

Fixes: a8cf7f272b5a ("mm: add find_lock_head")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://lore.kernel.org/r/dce89b296a4f5f9f8f798d5e76b6736c14a916ac.1603791716.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/pagemap.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c77b7c31b2e4..e1e19c1f9ec9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -344,9 +344,9 @@ static inline struct page *find_get_page_flags(struct address_space *mapping,
 /**
  * find_lock_page - locate, pin and lock a pagecache page
  * @mapping: the address_space to search
- * @offset: the page index
+ * @index: the page index
  *
- * Looks up the page cache entry at @mapping & @offset.  If there is a
+ * Looks up the page cache entry at @mapping & @index.  If there is a
  * page cache page, it is returned locked and with an increased
  * refcount.
  *
@@ -363,9 +363,9 @@ static inline struct page *find_lock_page(struct address_space *mapping,
 /**
  * find_lock_head - Locate, pin and lock a pagecache page.
  * @mapping: The address_space to search.
- * @offset: The page index.
+ * @index: The page index.
  *
- * Looks up the page cache entry at @mapping & @offset.  If there is a
+ * Looks up the page cache entry at @mapping & @index.  If there is a
  * page cache page, its head page is returned locked and with an increased
  * refcount.
  *
-- 
cgit v1.2.3


From e86c6569c588a01f20e7554cc245f8fae831957b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 27 Oct 2020 10:51:18 +0100
Subject: net: phy: remove kernel-doc duplication

Sphinx 3 now checks for duplicated function declarations:

	.../Documentation/networking/kapi:143: ../include/linux/phy.h:163: WARNING: Duplicate C declaration, also defined in 'networking/kapi'.
	Declaration is 'unsigned int phy_supported_speeds (struct phy_device *phy, unsigned int *speeds, unsigned int size)'.
	.../Documentation/networking/kapi:143: ../include/linux/phy.h:1034: WARNING: Duplicate C declaration, also defined in 'networking/kapi'.
	Declaration is 'int phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'.
	.../Documentation/networking/kapi:143: ../include/linux/phy.h:1076: WARNING: Duplicate C declaration, also defined in 'networking/kapi'.
	Declaration is 'int __phy_read_mmd (struct phy_device *phydev, int devad, u32 regnum)'.
	.../Documentation/networking/kapi:143: ../include/linux/phy.h:1088: WARNING: Duplicate C declaration, also defined in 'networking/kapi'.
	Declaration is 'int phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'.
	.../Documentation/networking/kapi:143: ../include/linux/phy.h:1100: WARNING: Duplicate C declaration, also defined in 'networking/kapi'.
	Declaration is 'int __phy_write_mmd (struct phy_device *phydev, int devad, u32 regnum, u16 val)'.

It turns that both the C and the H files have the same
kernel-doc markup for the same functions. Let's drop the
at the header file, keeping the one closer to the code.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/75e9a357f9a716833d2094b04898754876365e68.1603791716.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/phy.h | 40 +++++-----------------------------------
 1 file changed, 5 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index eb3cb1a98b45..56563e5e0dc7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -147,16 +147,8 @@ typedef enum {
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
-/**
+/*
  * phy_supported_speeds - return all speeds currently supported by a PHY device
- * @phy: The PHY device to return supported speeds of.
- * @speeds: buffer to store supported speeds in.
- * @size: size of speeds buffer.
- *
- * Description: Returns the number of supported speeds, and fills
- * the speeds buffer with the supported speeds. If speeds buffer is
- * too small to contain all currently supported speeds, will return as
- * many speeds as can fit.
  */
 unsigned int phy_supported_speeds(struct phy_device *phy,
 				      unsigned int *speeds,
@@ -1022,14 +1014,9 @@ static inline int __phy_modify_changed(struct phy_device *phydev, u32 regnum,
 					regnum, mask, set);
 }
 
-/**
+/*
  * phy_read_mmd - Convenience function for reading a register
  * from an MMD on a given PHY.
- * @phydev: The phy_device struct
- * @devad: The MMD to read from
- * @regnum: The register on the MMD to read
- *
- * Same rules as for phy_read();
  */
 int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
 
@@ -1064,38 +1051,21 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
 	__ret; \
 })
 
-/**
+/*
  * __phy_read_mmd - Convenience function for reading a register
  * from an MMD on a given PHY.
- * @phydev: The phy_device struct
- * @devad: The MMD to read from
- * @regnum: The register on the MMD to read
- *
- * Same rules as for __phy_read();
  */
 int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
 
-/**
+/*
  * phy_write_mmd - Convenience function for writing a register
  * on an MMD on a given PHY.
- * @phydev: The phy_device struct
- * @devad: The MMD to write to
- * @regnum: The register on the MMD to read
- * @val: value to write to @regnum
- *
- * Same rules as for phy_write();
  */
 int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
 
-/**
+/*
  * __phy_write_mmd - Convenience function for writing a register
  * on an MMD on a given PHY.
- * @phydev: The phy_device struct
- * @devad: The MMD to write to
- * @regnum: The register on the MMD to read
- * @val: value to write to @regnum
- *
- * Same rules as for __phy_write();
  */
 int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
 
-- 
cgit v1.2.3


From cf38cc9f1e71151f22584c40357afaab6609384b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 27 Oct 2020 10:51:23 +0100
Subject: locking/refcount: move kernel-doc markups to the proper place

Changeset a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value")
added a set of functions starting with __ that have a new
parameter, adding a series of new warnings:

	$ ./scripts/kernel-doc -none include/linux/refcount.h
	include/linux/refcount.h:169: warning: Function parameter or member 'oldp' not described in '__refcount_add_not_zero'
	include/linux/refcount.h:208: warning: Function parameter or member 'oldp' not described in '__refcount_add'
	include/linux/refcount.h:239: warning: Function parameter or member 'oldp' not described in '__refcount_inc_not_zero'
	include/linux/refcount.h:261: warning: Function parameter or member 'oldp' not described in '__refcount_inc'
	include/linux/refcount.h:291: warning: Function parameter or member 'oldp' not described in '__refcount_sub_and_test'
	include/linux/refcount.h:327: warning: Function parameter or member 'oldp' not described in '__refcount_dec_and_test'
	include/linux/refcount.h:347: warning: Function parameter or member 'oldp' not described in '__refcount_dec'

The issue is that the kernel-doc markups are now misplaced,
as they should be added just before the functions.

So, move the kernel-doc markups to the proper places,
in order to drop the warnings.

It should be noticed that git show produces a crappy output,
for this patch without "--patience" flag.

Fixes: a435b9a14356 ("locking/refcount: Provide __refcount API to obtain the old value")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/7985c31d1ace591bc5e1faa05c367f1295b78afd.1603791716.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/refcount.h | 130 +++++++++++++++++++++++------------------------
 1 file changed, 65 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 7fabb1af18e0..497990c69b0b 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -147,24 +147,6 @@ static inline unsigned int refcount_read(const refcount_t *r)
 	return atomic_read(&r->refs);
 }
 
-/**
- * refcount_add_not_zero - add a value to a refcount unless it is 0
- * @i: the value to add to the refcount
- * @r: the refcount
- *
- * Will saturate at REFCOUNT_SATURATED and WARN.
- *
- * Provides no memory ordering, it is assumed the caller has guaranteed the
- * object memory to be stable (RCU, etc.). It does provide a control dependency
- * and thereby orders future stores. See the comment on top.
- *
- * Use of this function is not recommended for the normal reference counting
- * use case in which references are taken and released one at a time.  In these
- * cases, refcount_inc(), or one of its variants, should instead be used to
- * increment a reference count.
- *
- * Return: false if the passed refcount is 0, true otherwise
- */
 static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp)
 {
 	int old = refcount_read(r);
@@ -183,17 +165,12 @@ static inline __must_check bool __refcount_add_not_zero(int i, refcount_t *r, in
 	return old;
 }
 
-static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
-{
-	return __refcount_add_not_zero(i, r, NULL);
-}
-
 /**
- * refcount_add - add a value to a refcount
+ * refcount_add_not_zero - add a value to a refcount unless it is 0
  * @i: the value to add to the refcount
  * @r: the refcount
  *
- * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN.
+ * Will saturate at REFCOUNT_SATURATED and WARN.
  *
  * Provides no memory ordering, it is assumed the caller has guaranteed the
  * object memory to be stable (RCU, etc.). It does provide a control dependency
@@ -203,7 +180,14 @@ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
  * use case in which references are taken and released one at a time.  In these
  * cases, refcount_inc(), or one of its variants, should instead be used to
  * increment a reference count.
+ *
+ * Return: false if the passed refcount is 0, true otherwise
  */
+static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r)
+{
+	return __refcount_add_not_zero(i, r, NULL);
+}
+
 static inline void __refcount_add(int i, refcount_t *r, int *oldp)
 {
 	int old = atomic_fetch_add_relaxed(i, &r->refs);
@@ -217,11 +201,32 @@ static inline void __refcount_add(int i, refcount_t *r, int *oldp)
 		refcount_warn_saturate(r, REFCOUNT_ADD_OVF);
 }
 
+/**
+ * refcount_add - add a value to a refcount
+ * @i: the value to add to the refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ *
+ * Use of this function is not recommended for the normal reference counting
+ * use case in which references are taken and released one at a time.  In these
+ * cases, refcount_inc(), or one of its variants, should instead be used to
+ * increment a reference count.
+ */
 static inline void refcount_add(int i, refcount_t *r)
 {
 	__refcount_add(i, r, NULL);
 }
 
+static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp)
+{
+	return __refcount_add_not_zero(1, r, oldp);
+}
+
 /**
  * refcount_inc_not_zero - increment a refcount unless it is 0
  * @r: the refcount to increment
@@ -235,14 +240,14 @@ static inline void refcount_add(int i, refcount_t *r)
  *
  * Return: true if the increment was successful, false otherwise
  */
-static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp)
+static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
 {
-	return __refcount_add_not_zero(1, r, oldp);
+	return __refcount_inc_not_zero(r, NULL);
 }
 
-static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
+static inline void __refcount_inc(refcount_t *r, int *oldp)
 {
-	return __refcount_inc_not_zero(r, NULL);
+	__refcount_add(1, r, oldp);
 }
 
 /**
@@ -257,14 +262,27 @@ static inline __must_check bool refcount_inc_not_zero(refcount_t *r)
  * Will WARN if the refcount is 0, as this represents a possible use-after-free
  * condition.
  */
-static inline void __refcount_inc(refcount_t *r, int *oldp)
+static inline void refcount_inc(refcount_t *r)
 {
-	__refcount_add(1, r, oldp);
+	__refcount_inc(r, NULL);
 }
 
-static inline void refcount_inc(refcount_t *r)
+static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
 {
-	__refcount_inc(r, NULL);
+	int old = atomic_fetch_sub_release(i, &r->refs);
+
+	if (oldp)
+		*oldp = old;
+
+	if (old == i) {
+		smp_acquire__after_ctrl_dep();
+		return true;
+	}
+
+	if (unlikely(old < 0 || old - i < 0))
+		refcount_warn_saturate(r, REFCOUNT_SUB_UAF);
+
+	return false;
 }
 
 /**
@@ -287,27 +305,14 @@ static inline void refcount_inc(refcount_t *r)
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
-static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp)
+static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
 {
-	int old = atomic_fetch_sub_release(i, &r->refs);
-
-	if (oldp)
-		*oldp = old;
-
-	if (old == i) {
-		smp_acquire__after_ctrl_dep();
-		return true;
-	}
-
-	if (unlikely(old < 0 || old - i < 0))
-		refcount_warn_saturate(r, REFCOUNT_SUB_UAF);
-
-	return false;
+	return __refcount_sub_and_test(i, r, NULL);
 }
 
-static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
+static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp)
 {
-	return __refcount_sub_and_test(i, r, NULL);
+	return __refcount_sub_and_test(1, r, oldp);
 }
 
 /**
@@ -323,26 +328,11 @@ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r)
  *
  * Return: true if the resulting refcount is 0, false otherwise
  */
-static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp)
-{
-	return __refcount_sub_and_test(1, r, oldp);
-}
-
 static inline __must_check bool refcount_dec_and_test(refcount_t *r)
 {
 	return __refcount_dec_and_test(r, NULL);
 }
 
-/**
- * refcount_dec - decrement a refcount
- * @r: the refcount
- *
- * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
- * when saturated at REFCOUNT_SATURATED.
- *
- * Provides release memory ordering, such that prior loads and stores are done
- * before.
- */
 static inline void __refcount_dec(refcount_t *r, int *oldp)
 {
 	int old = atomic_fetch_sub_release(1, &r->refs);
@@ -354,6 +344,16 @@ static inline void __refcount_dec(refcount_t *r, int *oldp)
 		refcount_warn_saturate(r, REFCOUNT_DEC_LEAK);
 }
 
+/**
+ * refcount_dec - decrement a refcount
+ * @r: the refcount
+ *
+ * Similar to atomic_dec(), it will WARN on underflow and fail to decrement
+ * when saturated at REFCOUNT_SATURATED.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before.
+ */
 static inline void refcount_dec(refcount_t *r)
 {
 	__refcount_dec(r, NULL);
-- 
cgit v1.2.3


From 80ade22c06ca115b81dd168e99479c8e09843513 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Tue, 27 Oct 2020 20:14:15 -0700
Subject: misc: mic: remove the MIC drivers

This patch removes the MIC drivers from the kernel tree
since the corresponding devices have been discontinued.

Removing the dma and char-misc changes in one patch and
merging via the char-misc tree is best to avoid any
potential build breakage.

Cc: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Acked-By: Vinod Koul <vkoul@kernel.org>
Reviewed-by: Sherry Sun <sherry.sun@nxp.com>
Link: https://lore.kernel.org/r/8c1443136563de34699d2c084df478181c205db4.1603854416.git.sudeep.dutt@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mic_bus.h |  100 ----
 include/linux/scif.h    | 1339 -----------------------------------------------
 2 files changed, 1439 deletions(-)
 delete mode 100644 include/linux/mic_bus.h
 delete mode 100644 include/linux/scif.h

(limited to 'include/linux')

diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h
deleted file mode 100644
index e99c789424e0..000000000000
--- a/include/linux/mic_bus.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Intel MIC Bus driver.
- *
- * This implementation is very similar to the virtio bus driver
- * implementation @ include/linux/virtio.h.
- */
-#ifndef _MIC_BUS_H_
-#define _MIC_BUS_H_
-/*
- * Everything a mbus driver needs to work with any particular mbus
- * implementation.
- */
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
-
-struct mbus_device_id {
-	__u32 device;
-	__u32 vendor;
-};
-
-#define MBUS_DEV_DMA_HOST 2
-#define MBUS_DEV_DMA_MIC 3
-#define MBUS_DEV_ANY_ID 0xffffffff
-
-/**
- * mbus_device - representation of a device using mbus
- * @mmio_va: virtual address of mmio space
- * @hw_ops: the hardware ops supported by this device.
- * @id: the device type identification (used to match it with a driver).
- * @dev: underlying device.
- * be used to communicate with.
- * @index: unique position on the mbus bus
- */
-struct mbus_device {
-	void __iomem *mmio_va;
-	struct mbus_hw_ops *hw_ops;
-	struct mbus_device_id id;
-	struct device dev;
-	int index;
-};
-
-/**
- * mbus_driver - operations for a mbus I/O driver
- * @driver: underlying device driver (populate name and owner).
- * @id_table: the ids serviced by this driver.
- * @probe: the function to call when a device is found.  Returns 0 or -errno.
- * @remove: the function to call when a device is removed.
- */
-struct mbus_driver {
-	struct device_driver driver;
-	const struct mbus_device_id *id_table;
-	int (*probe)(struct mbus_device *dev);
-	void (*scan)(struct mbus_device *dev);
-	void (*remove)(struct mbus_device *dev);
-};
-
-/**
- * struct mic_irq - opaque pointer used as cookie
- */
-struct mic_irq;
-
-/**
- * mbus_hw_ops - Hardware operations for accessing a MIC device on the MIC bus.
- */
-struct mbus_hw_ops {
-	struct mic_irq* (*request_threaded_irq)(struct mbus_device *mbdev,
-						irq_handler_t handler,
-						irq_handler_t thread_fn,
-						const char *name, void *data,
-						int intr_src);
-	void (*free_irq)(struct mbus_device *mbdev,
-			 struct mic_irq *cookie, void *data);
-	void (*ack_interrupt)(struct mbus_device *mbdev, int num);
-};
-
-struct mbus_device *
-mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops,
-		     struct mbus_hw_ops *hw_ops, int index,
-		     void __iomem *mmio_va);
-void mbus_unregister_device(struct mbus_device *mbdev);
-
-int mbus_register_driver(struct mbus_driver *drv);
-void mbus_unregister_driver(struct mbus_driver *drv);
-
-static inline struct mbus_device *dev_to_mbus(struct device *_dev)
-{
-	return container_of(_dev, struct mbus_device, dev);
-}
-
-static inline struct mbus_driver *drv_to_mbus(struct device_driver *drv)
-{
-	return container_of(drv, struct mbus_driver, driver);
-}
-
-#endif /* _MIC_BUS_H */
diff --git a/include/linux/scif.h b/include/linux/scif.h
deleted file mode 100644
index 329e695b8fe5..000000000000
--- a/include/linux/scif.h
+++ /dev/null
@@ -1,1339 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2014 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- *   notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- *   notice, this list of conditions and the following disclaimer in
- *   the documentation and/or other materials provided with the
- *   distribution.
- * * Neither the name of Intel Corporation nor the names of its
- *   contributors may be used to endorse or promote products derived
- *   from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Intel SCIF driver.
- *
- */
-#ifndef __SCIF_H__
-#define __SCIF_H__
-
-#include <linux/types.h>
-#include <linux/poll.h>
-#include <linux/device.h>
-#include <linux/scif_ioctl.h>
-
-#define SCIF_ACCEPT_SYNC	1
-#define SCIF_SEND_BLOCK		1
-#define SCIF_RECV_BLOCK		1
-
-enum {
-	SCIF_PROT_READ = (1 << 0),
-	SCIF_PROT_WRITE = (1 << 1)
-};
-
-enum {
-	SCIF_MAP_FIXED = 0x10,
-	SCIF_MAP_KERNEL	= 0x20,
-};
-
-enum {
-	SCIF_FENCE_INIT_SELF = (1 << 0),
-	SCIF_FENCE_INIT_PEER = (1 << 1),
-	SCIF_SIGNAL_LOCAL = (1 << 4),
-	SCIF_SIGNAL_REMOTE = (1 << 5)
-};
-
-enum {
-	SCIF_RMA_USECPU = (1 << 0),
-	SCIF_RMA_USECACHE = (1 << 1),
-	SCIF_RMA_SYNC = (1 << 2),
-	SCIF_RMA_ORDERED = (1 << 3)
-};
-
-/* End of SCIF Admin Reserved Ports */
-#define SCIF_ADMIN_PORT_END	1024
-
-/* End of SCIF Reserved Ports */
-#define SCIF_PORT_RSVD		1088
-
-typedef struct scif_endpt *scif_epd_t;
-typedef struct scif_pinned_pages *scif_pinned_pages_t;
-
-/**
- * struct scif_range - SCIF registered range used in kernel mode
- * @cookie: cookie used internally by SCIF
- * @nr_pages: number of pages of PAGE_SIZE
- * @prot_flags: R/W protection
- * @phys_addr: Array of bus addresses
- * @va: Array of kernel virtual addresses backed by the pages in the phys_addr
- *	array. The va is populated only when called on the host for a remote
- *	SCIF connection on MIC. This is required to support the use case of DMA
- *	between MIC and another device which is not a SCIF node e.g., an IB or
- *	ethernet NIC.
- */
-struct scif_range {
-	void *cookie;
-	int nr_pages;
-	int prot_flags;
-	dma_addr_t *phys_addr;
-	void __iomem **va;
-};
-
-/**
- * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll
- * @epd: SCIF endpoint
- * @events: requested events
- * @revents: returned events
- */
-struct scif_pollepd {
-	scif_epd_t epd;
-	__poll_t events;
-	__poll_t revents;
-};
-
-/**
- * scif_peer_dev - representation of a peer SCIF device
- *
- * Peer devices show up as PCIe devices for the mgmt node but not the cards.
- * The mgmt node discovers all the cards on the PCIe bus and informs the other
- * cards about their peers. Upon notification of a peer a node adds a peer
- * device to the peer bus to maintain symmetry in the way devices are
- * discovered across all nodes in the SCIF network.
- *
- * @dev: underlying device
- * @dnode - The destination node which this device will communicate with.
- */
-struct scif_peer_dev {
-	struct device dev;
-	u8 dnode;
-};
-
-/**
- * scif_client - representation of a SCIF client
- * @name: client name
- * @probe - client method called when a peer device is registered
- * @remove - client method called when a peer device is unregistered
- * @si - subsys_interface used internally for implementing SCIF clients
- */
-struct scif_client {
-	const char *name;
-	void (*probe)(struct scif_peer_dev *spdev);
-	void (*remove)(struct scif_peer_dev *spdev);
-	struct subsys_interface si;
-};
-
-#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
-#define SCIF_REGISTER_FAILED ((off_t)-1)
-#define SCIF_MMAP_FAILED ((void *)-1)
-
-/**
- * scif_open() - Create an endpoint
- *
- * Return:
- * Upon successful completion, scif_open() returns an endpoint descriptor to
- * be used in subsequent SCIF functions calls to refer to that endpoint;
- * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
- * returned and errno is set to indicate the error; in kernel mode a NULL
- * scif_epd_t is returned.
- *
- * Errors:
- * ENOMEM - Insufficient kernel memory was available
- */
-scif_epd_t scif_open(void);
-
-/**
- * scif_bind() - Bind an endpoint to a port
- * @epd:	endpoint descriptor
- * @pn:		port number
- *
- * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
- * local node. If pn is zero, a port number greater than or equal to
- * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
- * exactly one local port. Ports less than 1024 when requested can only be bound
- * by system (or root) processes or by processes executed by privileged users.
- *
- * Return:
- * Upon successful completion, scif_bind() returns the port number to which epd
- * is bound; otherwise in user mode -1 is returned and errno is set to
- * indicate the error; in kernel mode the negative of one of the following
- * errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * EINVAL - the endpoint or the port is already bound
- * EISCONN - The endpoint is already connected
- * ENOSPC - No port number available for assignment
- * EACCES - The port requested is protected and the user is not the superuser
- */
-int scif_bind(scif_epd_t epd, u16 pn);
-
-/**
- * scif_listen() - Listen for connections on an endpoint
- * @epd:	endpoint descriptor
- * @backlog:	maximum pending connection requests
- *
- * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
- * an endpoint that will be used to accept incoming connection requests. Once
- * so marked, the endpoint is said to be in the listening state and may not be
- * used as the endpoint of a connection.
- *
- * The endpoint, epd, must have been bound to a port.
- *
- * The backlog argument defines the maximum length to which the queue of
- * pending connections for epd may grow. If a connection request arrives when
- * the queue is full, the client may receive an error with an indication that
- * the connection was refused.
- *
- * Return:
- * Upon successful completion, scif_listen() returns 0; otherwise in user mode
- * -1 is returned and errno is set to indicate the error; in kernel mode the
- * negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * EINVAL - the endpoint is not bound to a port
- * EISCONN - The endpoint is already connected or listening
- */
-int scif_listen(scif_epd_t epd, int backlog);
-
-/**
- * scif_connect() - Initiate a connection on a port
- * @epd:	endpoint descriptor
- * @dst:	global id of port to which to connect
- *
- * The scif_connect() function requests the connection of endpoint epd to remote
- * port dst. If the connection is successful, a peer endpoint, bound to dst, is
- * created on node dst.node. On successful return, the connection is complete.
- *
- * If the endpoint epd has not already been bound to a port, scif_connect()
- * will bind it to an unused local port.
- *
- * A connection is terminated when an endpoint of the connection is closed,
- * either explicitly by scif_close(), or when a process that owns one of the
- * endpoints of the connection is terminated.
- *
- * In user space, scif_connect() supports an asynchronous connection mode
- * if the application has set the O_NONBLOCK flag on the endpoint via the
- * fcntl() system call. Setting this flag will result in the calling process
- * not to wait during scif_connect().
- *
- * Return:
- * Upon successful completion, scif_connect() returns the port ID to which the
- * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is
- * set to indicate the error; in kernel mode the negative of one of the
- * following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNREFUSED - The destination was not listening for connections or refused
- * the connection request
- * EINVAL - dst.port is not a valid port ID
- * EISCONN - The endpoint is already connected
- * ENOMEM - No buffer space is available
- * ENODEV - The destination node does not exist, or the node is lost or existed,
- * but is not currently in the network since it may have crashed
- * ENOSPC - No port number available for assignment
- * EOPNOTSUPP - The endpoint is listening and cannot be connected
- */
-int scif_connect(scif_epd_t epd, struct scif_port_id *dst);
-
-/**
- * scif_accept() - Accept a connection on an endpoint
- * @epd:	endpoint descriptor
- * @peer:	global id of port to which connected
- * @newepd:	new connected endpoint descriptor
- * @flags:	flags
- *
- * The scif_accept() call extracts the first connection request from the queue
- * of pending connections for the port on which epd is listening. scif_accept()
- * creates a new endpoint, bound to the same port as epd, and allocates a new
- * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
- * endpoint is connected to the endpoint through which the connection was
- * requested. epd is unaffected by this call, and remains in the listening
- * state.
- *
- * On successful return, peer holds the global port identifier (node id and
- * local port number) of the port which requested the connection.
- *
- * A connection is terminated when an endpoint of the connection is closed,
- * either explicitly by scif_close(), or when a process that owns one of the
- * endpoints of the connection is terminated.
- *
- * The number of connections that can (subsequently) be accepted on epd is only
- * limited by system resources (memory).
- *
- * The flags argument is formed by OR'ing together zero or more of the
- * following values.
- * SCIF_ACCEPT_SYNC - block until a connection request is presented. If
- *			SCIF_ACCEPT_SYNC is not in flags, and no pending
- *			connections are present on the queue, scif_accept()
- *			fails with an EAGAIN error
- *
- * In user mode, the select() and poll() functions can be used to determine
- * when there is a connection request. In kernel mode, the scif_poll()
- * function may be used for this purpose. A readable event will be delivered
- * when a connection is requested.
- *
- * Return:
- * Upon successful completion, scif_accept() returns 0; otherwise in user mode
- * -1 is returned and errno is set to indicate the error; in kernel mode the
- *	negative of one of the following errors is returned.
- *
- * Errors:
- * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be
- * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete
- * its connection request
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * EINTR - Interrupted function
- * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is
- * NULL, or newepd is NULL
- * ENODEV - The requesting node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOENT - Secondary part of epd registration failed
- */
-int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t
-		*newepd, int flags);
-
-/**
- * scif_close() - Close an endpoint
- * @epd:	endpoint descriptor
- *
- * scif_close() closes an endpoint and performs necessary teardown of
- * facilities associated with that endpoint.
- *
- * If epd is a listening endpoint then it will no longer accept connection
- * requests on the port to which it is bound. Any pending connection requests
- * are rejected.
- *
- * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
- * which are in-process through epd or its peer endpoint will complete before
- * scif_close() returns. Registered windows of the local and peer endpoints are
- * released as if scif_unregister() was called against each window.
- *
- * Closing a SCIF endpoint does not affect local registered memory mapped by
- * a SCIF endpoint on a remote node. The local memory remains mapped by the peer
- * SCIF endpoint explicitly removed by calling munmap(..) by the peer.
- *
- * If the peer endpoint's receive queue is not empty at the time that epd is
- * closed, then the peer endpoint can be passed as the endpoint parameter to
- * scif_recv() until the receive queue is empty.
- *
- * epd is freed and may no longer be accessed.
- *
- * Return:
- * Upon successful completion, scif_close() returns 0; otherwise in user mode
- * -1 is returned and errno is set to indicate the error; in kernel mode the
- * negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- */
-int scif_close(scif_epd_t epd);
-
-/**
- * scif_send() - Send a message
- * @epd:	endpoint descriptor
- * @msg:	message buffer address
- * @len:	message length
- * @flags:	blocking mode flags
- *
- * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
- * are copied from memory starting at address msg. On successful execution the
- * return value of scif_send() is the number of bytes that were sent, and is
- * zero if no bytes were sent because len was zero. scif_send() may be called
- * only when the endpoint is in a connected state.
- *
- * If a scif_send() call is non-blocking, then it sends only those bytes which
- * can be sent without waiting, up to a maximum of len bytes.
- *
- * If a scif_send() call is blocking, then it normally returns after sending
- * all len bytes. If a blocking call is interrupted or the connection is
- * reset, the call is considered successful if some bytes were sent or len is
- * zero, otherwise the call is considered unsuccessful.
- *
- * In user mode, the select() and poll() functions can be used to determine
- * when the send queue is not full. In kernel mode, the scif_poll() function
- * may be used for this purpose.
- *
- * It is recommended that scif_send()/scif_recv() only be used for short
- * control-type message communication between SCIF endpoints. The SCIF RMA
- * APIs are expected to provide better performance for transfer sizes of
- * 1024 bytes or longer for the current MIC hardware and software
- * implementation.
- *
- * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK
- * is passed as the flags argument.
- *
- * Return:
- * Upon successful completion, scif_send() returns the number of bytes sent;
- * otherwise in user mode -1 is returned and errno is set to indicate the
- * error; in kernel mode the negative of one of the following errors is
- * returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - flags is invalid, or len is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOTCONN - The endpoint is not connected
- */
-int scif_send(scif_epd_t epd, void *msg, int len, int flags);
-
-/**
- * scif_recv() - Receive a message
- * @epd:	endpoint descriptor
- * @msg:	message buffer address
- * @len:	message buffer length
- * @flags:	blocking mode flags
- *
- * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
- * data are copied to memory starting at address msg. On successful execution
- * the return value of scif_recv() is the number of bytes that were received,
- * and is zero if no bytes were received because len was zero. scif_recv() may
- * be called only when the endpoint is in a connected state.
- *
- * If a scif_recv() call is non-blocking, then it receives only those bytes
- * which can be received without waiting, up to a maximum of len bytes.
- *
- * If a scif_recv() call is blocking, then it normally returns after receiving
- * all len bytes. If the blocking call was interrupted due to a disconnection,
- * subsequent calls to scif_recv() will copy all bytes received upto the point
- * of disconnection.
- *
- * In user mode, the select() and poll() functions can be used to determine
- * when data is available to be received. In kernel mode, the scif_poll()
- * function may be used for this purpose.
- *
- * It is recommended that scif_send()/scif_recv() only be used for short
- * control-type message communication between SCIF endpoints. The SCIF RMA
- * APIs are expected to provide better performance for transfer sizes of
- * 1024 bytes or longer for the current MIC hardware and software
- * implementation.
- *
- * scif_recv() will block until the entire message is received if
- * SCIF_RECV_BLOCK is passed as the flags argument.
- *
- * Return:
- * Upon successful completion, scif_recv() returns the number of bytes
- * received; otherwise in user mode -1 is returned and errno is set to
- * indicate the error; in kernel mode the negative of one of the following
- * errors is returned.
- *
- * Errors:
- * EAGAIN - The destination node is returning from a low power state
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - flags is invalid, or len is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOTCONN - The endpoint is not connected
- */
-int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
-
-/**
- * scif_register() - Mark a memory region for remote access.
- * @epd:		endpoint descriptor
- * @addr:		starting virtual address
- * @len:		length of range
- * @offset:		offset of window
- * @prot_flags:		read/write protection flags
- * @map_flags:		mapping flags
- *
- * The scif_register() function opens a window, a range of whole pages of the
- * registered address space of the endpoint epd, starting at offset po and
- * continuing for len bytes. The value of po, further described below, is a
- * function of the parameters offset and len, and the value of map_flags. Each
- * page of the window represents the physical memory page which backs the
- * corresponding page of the range of virtual address pages starting at addr
- * and continuing for len bytes. addr and len are constrained to be multiples
- * of the page size. A successful scif_register() call returns po.
- *
- * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
- * exactly, and offset is constrained to be a multiple of the page size. The
- * mapping established by scif_register() will not replace any existing
- * registration; an error is returned if any page within the range [offset,
- * offset + len - 1] intersects an existing window.
- *
- * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
- * implementation-defined manner to arrive at po. The po value so chosen will
- * be an area of the registered address space that the implementation deems
- * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
- * granting the implementation complete freedom in selecting po, subject to
- * constraints described below. A non-zero value of offset is taken to be a
- * suggestion of an offset near which the mapping should be placed. When the
- * implementation selects a value for po, it does not replace any extant
- * window. In all cases, po will be a multiple of the page size.
- *
- * The physical pages which are so represented by a window are available for
- * access in calls to mmap(), scif_readfrom(), scif_writeto(),
- * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
- * physical pages represented by the window will not be reused by the memory
- * subsystem for any other purpose. Note that the same physical page may be
- * represented by multiple windows.
- *
- * Subsequent operations which change the memory pages to which virtual
- * addresses are mapped (such as mmap(), munmap()) have no effect on
- * existing window.
- *
- * If the process will fork(), it is recommended that the registered
- * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
- * problems due to copy-on-write semantics.
- *
- * The prot_flags argument is formed by OR'ing together one or more of the
- * following values.
- * SCIF_PROT_READ - allow read operations from the window
- * SCIF_PROT_WRITE - allow write operations to the window
- *
- * Return:
- * Upon successful completion, scif_register() returns the offset at which the
- * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
- * is (off_t *)-1) is returned and errno is set to indicate the error; in
- * kernel mode the negative of one of the following errors is returned.
- *
- * Errors:
- * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range
- * [offset, offset + len -1] are already registered
- * EAGAIN - The mapping could not be performed due to lack of resources
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
- * set in flags, and offset is not a multiple of the page size, or addr is not a
- * multiple of the page size, or len is not a multiple of the page size, or is
- * 0, or offset is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOTCONN -The endpoint is not connected
- */
-off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
-		    int prot_flags, int map_flags);
-
-/**
- * scif_unregister() - Mark a memory region for remote access.
- * @epd:	endpoint descriptor
- * @offset:	start of range to unregister
- * @len:	length of range to unregister
- *
- * The scif_unregister() function closes those previously registered windows
- * which are entirely within the range [offset, offset + len - 1]. It is an
- * error to specify a range which intersects only a subrange of a window.
- *
- * On a successful return, pages within the window may no longer be specified
- * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
- * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window,
- * however, continues to exist until all previous references against it are
- * removed. A window is referenced if there is a mapping to it created by
- * mmap(), or if scif_get_pages() was called against the window
- * (and the pages have not been returned via scif_put_pages()). A window is
- * also referenced while an RMA, in which some range of the window is a source
- * or destination, is in progress. Finally a window is referenced while some
- * offset in that window was specified to scif_fence_signal(), and the RMAs
- * marked by that call to scif_fence_signal() have not completed. While a
- * window is in this state, its registered address space pages are not
- * available for use in a new registered window.
- *
- * When all such references to the window have been removed, its references to
- * all the physical pages which it represents are removed. Similarly, the
- * registered address space pages of the window become available for
- * registration in a new window.
- *
- * Return:
- * Upon successful completion, scif_unregister() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned. In the event of an
- * error, no windows are unregistered.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a
- * window, or offset is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the
- * registered address space of epd
- */
-int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
-
-/**
- * scif_readfrom() - Copy from a remote address space
- * @epd:	endpoint descriptor
- * @loffset:	offset in local registered address space to
- *		which to copy
- * @len:	length of range to copy
- * @roffset:	offset in remote registered address space
- *		from which to copy
- * @rma_flags:	transfer mode flags
- *
- * scif_readfrom() copies len bytes from the remote registered address space of
- * the peer of endpoint epd, starting at the offset roffset to the local
- * registered address space of epd, starting at the offset loffset.
- *
- * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
- * roffset + len - 1] must be within some registered window or windows of the
- * local and remote nodes. A range may intersect multiple registered windows,
- * but only if those windows are contiguous in the registered address space.
- *
- * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
- * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
- * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
- * transfer is complete. Otherwise, the transfer may be performed asynchron-
- * ously. The order in which any two asynchronous RMA operations complete
- * is non-deterministic. The synchronization functions, scif_fence_mark()/
- * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
- * the completion of asynchronous RMA operations on the same endpoint.
- *
- * The DMA transfer of individual bytes is not guaranteed to complete in
- * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
- * cacheline or partial cacheline of the source range will become visible on
- * the destination node after all other transferred data in the source
- * range has become visible on the destination node.
- *
- * The optimal DMA performance will likely be realized if both
- * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
- * performance will likely be realized if loffset and roffset are not
- * cacheline aligned but are separated by some multiple of 64. The lowest level
- * of performance is likely if loffset and roffset are not separated by a
- * multiple of 64.
- *
- * The rma_flags argument is formed by ORing together zero or more of the
- * following values.
- * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
- *	engine.
- * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
- *		transfer has completed. Passing this flag results in the
- *		current implementation busy waiting and consuming CPU cycles
- *		while the DMA transfer is in progress for best performance by
- *		avoiding the interrupt latency.
- * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
- *		the source range becomes visible on the destination node
- *		after all other transferred data in the source range has
- *		become visible on the destination
- *
- * Return:
- * Upon successful completion, scif_readfrom() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EACCES - Attempt to write to a read-only range
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - rma_flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
- * address space of epd, or, The range [roffset, roffset + len - 1] is invalid
- * for the registered address space of the peer of epd, or loffset or roffset
- * is negative
- */
-int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
-		  roffset, int rma_flags);
-
-/**
- * scif_writeto() - Copy to a remote address space
- * @epd:	endpoint descriptor
- * @loffset:	offset in local registered address space
- *		from which to copy
- * @len:	length of range to copy
- * @roffset:	offset in remote registered address space to
- *		which to copy
- * @rma_flags:	transfer mode flags
- *
- * scif_writeto() copies len bytes from the local registered address space of
- * epd, starting at the offset loffset to the remote registered address space
- * of the peer of endpoint epd, starting at the offset roffset.
- *
- * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
- * roffset + len - 1] must be within some registered window or windows of the
- * local and remote nodes. A range may intersect multiple registered windows,
- * but only if those windows are contiguous in the registered address space.
- *
- * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
- * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
- * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
- * transfer is complete. Otherwise, the transfer may be performed asynchron-
- * ously. The order in which any two asynchronous RMA operations complete
- * is non-deterministic. The synchronization functions, scif_fence_mark()/
- * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
- * the completion of asynchronous RMA operations on the same endpoint.
- *
- * The DMA transfer of individual bytes is not guaranteed to complete in
- * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
- * cacheline or partial cacheline of the source range will become visible on
- * the destination node after all other transferred data in the source
- * range has become visible on the destination node.
- *
- * The optimal DMA performance will likely be realized if both
- * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
- * performance will likely be realized if loffset and roffset are not cacheline
- * aligned but are separated by some multiple of 64. The lowest level of
- * performance is likely if loffset and roffset are not separated by a multiple
- * of 64.
- *
- * The rma_flags argument is formed by ORing together zero or more of the
- * following values.
- * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
- *			engine.
- * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
- *		transfer has completed. Passing this flag results in the
- *		current implementation busy waiting and consuming CPU cycles
- *		while the DMA transfer is in progress for best performance by
- *		avoiding the interrupt latency.
- * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
- *		the source range becomes visible on the destination node
- *		after all other transferred data in the source range has
- *		become visible on the destination
- *
- * Return:
- * Upon successful completion, scif_readfrom() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EACCES - Attempt to write to a read-only range
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - rma_flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
- * address space of epd, or, The range [roffset , roffset + len -1] is invalid
- * for the registered address space of the peer of epd, or loffset or roffset
- * is negative
- */
-int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
-		 roffset, int rma_flags);
-
-/**
- * scif_vreadfrom() - Copy from a remote address space
- * @epd:	endpoint descriptor
- * @addr:	address to which to copy
- * @len:	length of range to copy
- * @roffset:	offset in remote registered address space
- *		from which to copy
- * @rma_flags:	transfer mode flags
- *
- * scif_vreadfrom() copies len bytes from the remote registered address
- * space of the peer of endpoint epd, starting at the offset roffset, to local
- * memory, starting at addr.
- *
- * The specified range [roffset, roffset + len - 1] must be within some
- * registered window or windows of the remote nodes. The range may
- * intersect multiple registered windows, but only if those windows are
- * contiguous in the registered address space.
- *
- * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
- * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
- * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
- * transfer is complete. Otherwise, the transfer may be performed asynchron-
- * ously. The order in which any two asynchronous RMA operations complete
- * is non-deterministic. The synchronization functions, scif_fence_mark()/
- * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
- * the completion of asynchronous RMA operations on the same endpoint.
- *
- * The DMA transfer of individual bytes is not guaranteed to complete in
- * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
- * cacheline or partial cacheline of the source range will become visible on
- * the destination node after all other transferred data in the source
- * range has become visible on the destination node.
- *
- * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
- * the specified local memory range may be remain in a pinned state even after
- * the specified transfer completes. This may reduce overhead if some or all of
- * the same virtual address range is referenced in a subsequent call of
- * scif_vreadfrom() or scif_vwriteto().
- *
- * The optimal DMA performance will likely be realized if both
- * addr and roffset are cacheline aligned (are a multiple of 64). Lower
- * performance will likely be realized if addr and roffset are not
- * cacheline aligned but are separated by some multiple of 64. The lowest level
- * of performance is likely if addr and roffset are not separated by a
- * multiple of 64.
- *
- * The rma_flags argument is formed by ORing together zero or more of the
- * following values.
- * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
- *	engine.
- * SCIF_RMA_USECACHE - enable registration caching
- * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
- *		transfer has completed. Passing this flag results in the
- *		current implementation busy waiting and consuming CPU cycles
- *		while the DMA transfer is in progress for best performance by
- *		avoiding the interrupt latency.
- * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
- *	the source range becomes visible on the destination node
- *	after all other transferred data in the source range has
- *	become visible on the destination
- *
- * Return:
- * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EACCES - Attempt to write to a read-only range
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - rma_flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
- * registered address space of epd
- */
-int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
-		   int rma_flags);
-
-/**
- * scif_vwriteto() - Copy to a remote address space
- * @epd:	endpoint descriptor
- * @addr:	address from which to copy
- * @len:	length of range to copy
- * @roffset:	offset in remote registered address space to
- *		which to copy
- * @rma_flags:	transfer mode flags
- *
- * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
- * the remote registered address space of the peer of endpoint epd, starting at
- * the offset roffset.
- *
- * The specified range [roffset, roffset + len - 1] must be within some
- * registered window or windows of the remote nodes. The range may intersect
- * multiple registered windows, but only if those windows are contiguous in the
- * registered address space.
- *
- * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
- * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
- * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
- * transfer is complete. Otherwise, the transfer may be performed asynchron-
- * ously. The order in which any two asynchronous RMA operations complete
- * is non-deterministic. The synchronization functions, scif_fence_mark()/
- * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
- * the completion of asynchronous RMA operations on the same endpoint.
- *
- * The DMA transfer of individual bytes is not guaranteed to complete in
- * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
- * cacheline or partial cacheline of the source range will become visible on
- * the destination node after all other transferred data in the source
- * range has become visible on the destination node.
- *
- * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
- * the specified local memory range may be remain in a pinned state even after
- * the specified transfer completes. This may reduce overhead if some or all of
- * the same virtual address range is referenced in a subsequent call of
- * scif_vreadfrom() or scif_vwriteto().
- *
- * The optimal DMA performance will likely be realized if both
- * addr and offset are cacheline aligned (are a multiple of 64). Lower
- * performance will likely be realized if addr and offset are not cacheline
- * aligned but are separated by some multiple of 64. The lowest level of
- * performance is likely if addr and offset are not separated by a multiple of
- * 64.
- *
- * The rma_flags argument is formed by ORing together zero or more of the
- * following values.
- * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
- *	engine.
- * SCIF_RMA_USECACHE - allow registration caching
- * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
- *		transfer has completed. Passing this flag results in the
- *		current implementation busy waiting and consuming CPU cycles
- *		while the DMA transfer is in progress for best performance by
- *		avoiding the interrupt latency.
- * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
- *		the source range becomes visible on the destination node
- *		after all other transferred data in the source range has
- *		become visible on the destination
- *
- * Return:
- * Upon successful completion, scif_vwriteto() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EACCES - Attempt to write to a read-only range
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - rma_flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
- * registered address space of epd
- */
-int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset,
-		  int rma_flags);
-
-/**
- * scif_fence_mark() - Mark previously issued RMAs
- * @epd:	endpoint descriptor
- * @flags:	control flags
- * @mark:	marked value returned as output.
- *
- * scif_fence_mark() returns after marking the current set of all uncompleted
- * RMAs initiated through the endpoint epd or the current set of all
- * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
- * marked with a value returned at mark. The application may subsequently call
- * scif_fence_wait(), passing the value returned at mark, to await completion
- * of all RMAs so marked.
- *
- * The flags argument has exactly one of the following values.
- * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
- *	epd are marked
- * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
- *	of endpoint epd are marked
- *
- * Return:
- * Upon successful completion, scif_fence_mark() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - flags is invalid
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENOMEM - Insufficient kernel memory was available
- */
-int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
-
-/**
- * scif_fence_wait() - Wait for completion of marked RMAs
- * @epd:	endpoint descriptor
- * @mark:	mark request
- *
- * scif_fence_wait() returns after all RMAs marked with mark have completed.
- * The value passed in mark must have been obtained in a previous call to
- * scif_fence_mark().
- *
- * Return:
- * Upon successful completion, scif_fence_wait() returns 0; otherwise in user
- * mode -1 is returned and errno is set to indicate the error; in kernel mode
- * the negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENOMEM - Insufficient kernel memory was available
- */
-int scif_fence_wait(scif_epd_t epd, int mark);
-
-/**
- * scif_fence_signal() - Request a memory update on completion of RMAs
- * @epd:	endpoint descriptor
- * @loff:	local offset
- * @lval:	local value to write to loffset
- * @roff:	remote offset
- * @rval:	remote value to write to roffset
- * @flags:	flags
- *
- * scif_fence_signal() returns after marking the current set of all uncompleted
- * RMAs initiated through the endpoint epd or marking the current set of all
- * uncompleted RMAs initiated through the peer of endpoint epd.
- *
- * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
- * marked set, lval is written to memory at the address corresponding to offset
- * loff in the local registered address space of epd. loff must be within a
- * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
- * of the RMAs in the marked set, rval is written to memory at the address
- * corresponding to offset roff in the remote registered address space of epd.
- * roff must be within a remote registered window of the peer of epd. Note
- * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
- *
- * The flags argument is formed by OR'ing together the following.
- * Exactly one of the following values.
- * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
- *	epd are marked
- * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
- *	of endpoint epd are marked
- * One or more of the following values.
- * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to
- *	memory at the address corresponding to offset loff in the local
- *	registered address space of epd.
- * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to
- *	memory at the address corresponding to offset roff in the remote
- *	registered address space of epd.
- *
- * Return:
- * Upon successful completion, scif_fence_signal() returns 0; otherwise in
- * user mode -1 is returned and errno is set to indicate the error; in kernel
- * mode the negative of one of the following errors is returned.
- *
- * Errors:
- * EBADF, ENOTTY - epd is not a valid endpoint descriptor
- * ECONNRESET - Connection reset by peer
- * EINVAL - flags is invalid, or loff or roff are not DWORD aligned
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - loff is invalid for the registered address of epd, or roff is invalid
- * for the registered address space, of the peer of epd
- */
-int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
-		      u64 rval, int flags);
-
-/**
- * scif_get_node_ids() - Return information about online nodes
- * @nodes:	array in which to return online node IDs
- * @len:	number of entries in the nodes array
- * @self:	address to place the node ID of the local node
- *
- * scif_get_node_ids() fills in the nodes array with up to len node IDs of the
- * nodes in the SCIF network. If there is not enough space in nodes, as
- * indicated by the len parameter, only len node IDs are returned in nodes. The
- * return value of scif_get_node_ids() is the total number of nodes currently in
- * the SCIF network. By checking the return value against the len parameter,
- * the user may determine if enough space for nodes was allocated.
- *
- * The node ID of the local node is returned at self.
- *
- * Return:
- * Upon successful completion, scif_get_node_ids() returns the actual number of
- * online nodes in the SCIF network including 'self'; otherwise in user mode
- * -1 is returned and errno is set to indicate the error; in kernel mode no
- * errors are returned.
- */
-int scif_get_node_ids(u16 *nodes, int len, u16 *self);
-
-/**
- * scif_pin_pages() - Pin a set of pages
- * @addr:		Virtual address of range to pin
- * @len:		Length of range to pin
- * @prot_flags:		Page protection flags
- * @map_flags:		Page classification flags
- * @pinned_pages:	Handle to pinned pages
- *
- * scif_pin_pages() pins (locks in physical memory) the physical pages which
- * back the range of virtual address pages starting at addr and continuing for
- * len bytes. addr and len are constrained to be multiples of the page size. A
- * successful scif_pin_pages() call returns a handle to pinned_pages which may
- * be used in subsequent calls to scif_register_pinned_pages().
- *
- * The pages will remain pinned as long as there is a reference against the
- * scif_pinned_pages_t value returned by scif_pin_pages() and until
- * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
- * reference is added to a scif_pinned_pages_t value each time a window is
- * created by calling scif_register_pinned_pages() and passing the
- * scif_pinned_pages_t value. A reference is removed from a
- * scif_pinned_pages_t value each time such a window is deleted.
- *
- * Subsequent operations which change the memory pages to which virtual
- * addresses are mapped (such as mmap(), munmap()) have no effect on the
- * scif_pinned_pages_t value or windows created against it.
- *
- * If the process will fork(), it is recommended that the registered
- * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
- * problems due to copy-on-write semantics.
- *
- * The prot_flags argument is formed by OR'ing together one or more of the
- * following values.
- * SCIF_PROT_READ - allow read operations against the pages
- * SCIF_PROT_WRITE - allow write operations against the pages
- * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a
- * kernel space address. By default, addr is interpreted as a user space
- * address.
- *
- * Return:
- * Upon successful completion, scif_pin_pages() returns 0; otherwise the
- * negative of one of the following errors is returned.
- *
- * Errors:
- * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative
- * ENOMEM - Not enough space
- */
-int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags,
-		   scif_pinned_pages_t *pinned_pages);
-
-/**
- * scif_unpin_pages() - Unpin a set of pages
- * @pinned_pages:	Handle to pinned pages to be unpinned
- *
- * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new
- * windows against pinned_pages. The physical pages represented by pinned_pages
- * will remain pinned until all windows previously registered against
- * pinned_pages are deleted (the window is scif_unregister()'d and all
- * references to the window are removed (see scif_unregister()).
- *
- * pinned_pages must have been obtain from a previous call to scif_pin_pages().
- * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
- * scif_register_pinned_pages().
- *
- * Return:
- * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
- * negative of one of the following errors is returned.
- *
- * Errors:
- * EINVAL - pinned_pages is not valid
- */
-int scif_unpin_pages(scif_pinned_pages_t pinned_pages);
-
-/**
- * scif_register_pinned_pages() - Mark a memory region for remote access.
- * @epd:		endpoint descriptor
- * @pinned_pages:	Handle to pinned pages
- * @offset:		Registered address space offset
- * @map_flags:		Flags which control where pages are mapped
- *
- * The scif_register_pinned_pages() function opens a window, a range of whole
- * pages of the registered address space of the endpoint epd, starting at
- * offset po. The value of po, further described below, is a function of the
- * parameters offset and pinned_pages, and the value of map_flags. Each page of
- * the window represents a corresponding physical memory page of the range
- * represented by pinned_pages; the length of the window is the same as the
- * length of range represented by pinned_pages. A successful
- * scif_register_pinned_pages() call returns po as the return value.
- *
- * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
- * exactly, and offset is constrained to be a multiple of the page size. The
- * mapping established by scif_register_pinned_pages() will not replace any
- * existing registration; an error is returned if any page of the new window
- * would intersect an existing window.
- *
- * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
- * implementation-defined manner to arrive at po. The po so chosen will be an
- * area of the registered address space that the implementation deems suitable
- * for a mapping of the required size. An offset value of 0 is interpreted as
- * granting the implementation complete freedom in selecting po, subject to
- * constraints described below. A non-zero value of offset is taken to be a
- * suggestion of an offset near which the mapping should be placed. When the
- * implementation selects a value for po, it does not replace any extant
- * window. In all cases, po will be a multiple of the page size.
- *
- * The physical pages which are so represented by a window are available for
- * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
- * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
- * physical pages represented by the window will not be reused by the memory
- * subsystem for any other purpose. Note that the same physical page may be
- * represented by multiple windows.
- *
- * Windows created by scif_register_pinned_pages() are unregistered by
- * scif_unregister().
- *
- * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
- * fixed offset.
- *
- * Return:
- * Upon successful completion, scif_register_pinned_pages() returns the offset
- * at which the mapping was placed (po); otherwise the negative of one of the
- * following errors is returned.
- *
- * Errors:
- * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window
- * would intersect an existing window
- * EAGAIN - The mapping could not be performed due to lack of resources
- * ECONNRESET - Connection reset by peer
- * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and
- * offset is not a multiple of the page size, or offset is negative
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOMEM - Not enough space
- * ENOTCONN - The endpoint is not connected
- */
-off_t scif_register_pinned_pages(scif_epd_t epd,
-				 scif_pinned_pages_t pinned_pages,
-				 off_t offset, int map_flags);
-
-/**
- * scif_get_pages() - Add references to remote registered pages
- * @epd:	endpoint descriptor
- * @offset:	remote registered offset
- * @len:	length of range of pages
- * @pages:	returned scif_range structure
- *
- * scif_get_pages() returns the addresses of the physical pages represented by
- * those pages of the registered address space of the peer of epd, starting at
- * offset and continuing for len bytes. offset and len are constrained to be
- * multiples of the page size.
- *
- * All of the pages in the specified range [offset, offset + len - 1] must be
- * within a single window of the registered address space of the peer of epd.
- *
- * The addresses are returned as a virtually contiguous array pointed to by the
- * phys_addr component of the scif_range structure whose address is returned in
- * pages. The nr_pages component of scif_range is the length of the array. The
- * prot_flags component of scif_range holds the protection flag value passed
- * when the pages were registered.
- *
- * Each physical page whose address is returned by scif_get_pages() remains
- * available and will not be released for reuse until the scif_range structure
- * is returned in a call to scif_put_pages(). The scif_range structure returned
- * by scif_get_pages() must be unmodified.
- *
- * It is an error to call scif_close() on an endpoint on which a scif_range
- * structure of that endpoint has not been returned to scif_put_pages().
- *
- * Return:
- * Upon successful completion, scif_get_pages() returns 0; otherwise the
- * negative of one of the following errors is returned.
- * Errors:
- * ECONNRESET - Connection reset by peer.
- * EINVAL - offset is not a multiple of the page size, or offset is negative, or
- * len is not a multiple of the page size
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid
- * for the registered address space of the peer epd
- */
-int scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
-		   struct scif_range **pages);
-
-/**
- * scif_put_pages() - Remove references from remote registered pages
- * @pages:	pages to be returned
- *
- * scif_put_pages() releases a scif_range structure previously obtained by
- * calling scif_get_pages(). The physical pages represented by pages may
- * be reused when the window which represented those pages is unregistered.
- * Therefore, those pages must not be accessed after calling scif_put_pages().
- *
- * Return:
- * Upon successful completion, scif_put_pages() returns 0; otherwise the
- * negative of one of the following errors is returned.
- * Errors:
- * EINVAL - pages does not point to a valid scif_range structure, or
- * the scif_range structure pointed to by pages was already returned
- * ENODEV - The remote node is lost or existed, but is not currently in the
- * network since it may have crashed
- * ENOTCONN - The endpoint is not connected
- */
-int scif_put_pages(struct scif_range *pages);
-
-/**
- * scif_poll() - Wait for some event on an endpoint
- * @epds:	Array of endpoint descriptors
- * @nepds:	Length of epds
- * @timeout:	Upper limit on time for which scif_poll() will block
- *
- * scif_poll() waits for one of a set of endpoints to become ready to perform
- * an I/O operation.
- *
- * The epds argument specifies the endpoint descriptors to be examined and the
- * events of interest for each endpoint descriptor. epds is a pointer to an
- * array with one member for each open endpoint descriptor of interest.
- *
- * The number of items in the epds array is specified in nepds. The epd field
- * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
- * events is a bitmask specifying the events which the application is
- * interested in. The field revents is an output parameter, filled by the
- * kernel with the events that actually occurred. The bits returned in revents
- * can include any of those specified in events, or one of the values EPOLLERR,
- * EPOLLHUP, or EPOLLNVAL. (These three bits are meaningless in the events
- * field, and will be set in the revents field whenever the corresponding
- * condition is true.)
- *
- * If none of the events requested (and no error) has occurred for any of the
- * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
- *
- * The timeout argument specifies an upper limit on the time for which
- * scif_poll() will block, in milliseconds. Specifying a negative value in
- * timeout means an infinite timeout.
- *
- * The following bits may be set in events and returned in revents.
- * EPOLLIN - Data may be received without blocking. For a connected
- * endpoint, this means that scif_recv() may be called without blocking. For a
- * listening endpoint, this means that scif_accept() may be called without
- * blocking.
- * EPOLLOUT - Data may be sent without blocking. For a connected endpoint, this
- * means that scif_send() may be called without blocking. EPOLLOUT may also be
- * used to block waiting for a non-blocking connect to complete. This bit value
- * has no meaning for a listening endpoint and is ignored if specified.
- *
- * The following bits are only returned in revents, and are ignored if set in
- * events.
- * EPOLLERR - An error occurred on the endpoint
- * EPOLLHUP - The connection to the peer endpoint was disconnected
- * EPOLLNVAL - The specified endpoint descriptor is invalid.
- *
- * Return:
- * Upon successful completion, scif_poll() returns a non-negative value. A
- * positive value indicates the total number of endpoint descriptors that have
- * been selected (that is, endpoint descriptors for which the revents member is
- * non-zero). A value of 0 indicates that the call timed out and no endpoint
- * descriptors have been selected. Otherwise in user mode -1 is returned and
- * errno is set to indicate the error; in kernel mode the negative of one of
- * the following errors is returned.
- *
- * Errors:
- * EINTR - A signal occurred before any requested event
- * EINVAL - The nepds argument is greater than {OPEN_MAX}
- * ENOMEM - There was no space to allocate file descriptor tables
- */
-int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout);
-
-/**
- * scif_client_register() - Register a SCIF client
- * @client:	client to be registered
- *
- * scif_client_register() registers a SCIF client. The probe() method
- * of the client is called when SCIF peer devices come online and the
- * remove() method is called when the peer devices disappear.
- *
- * Return:
- * Upon successful completion, scif_client_register() returns a non-negative
- * value. Otherwise the return value is the same as subsys_interface_register()
- * in the kernel.
- */
-int scif_client_register(struct scif_client *client);
-
-/**
- * scif_client_unregister() - Unregister a SCIF client
- * @client:	client to be unregistered
- *
- * scif_client_unregister() unregisters a SCIF client.
- *
- * Return:
- * None
- */
-void scif_client_unregister(struct scif_client *client);
-
-#endif /* __SCIF_H__ */
-- 
cgit v1.2.3


From a62f68f5ca53ab61cba2f0a410d0add7a6d54a52 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 23 Oct 2020 17:35:46 +0200
Subject: cpufreq: Introduce cpufreq_driver_test_flags()

Add a helper function to test the flags of the cpufreq driver in use
againt a given flags mask.

In particular, this will be needed to test the
CPUFREQ_NEED_UPDATE_LIMITS cpufreq driver flag in the schedutil
governor.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 038ed83aab41..1eaa04f1bae6 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -433,6 +433,7 @@ struct cpufreq_driver {
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
+bool cpufreq_driver_test_flags(u16 flags);
 const char *cpufreq_get_current_driver(void);
 void *cpufreq_get_driver_data(void);
 
-- 
cgit v1.2.3


From a4147d855f50a676ebe61833a681f7c71945f343 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 31 Aug 2020 10:18:04 -0500
Subject: dmaengine: ti-cppi5: Replace zero-length array with flexible-array
 member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having a
dynamically sized set of trailing elements in a structure. Kernel code should
always use “flexible array members”[1] for these cases. The older style of
one-element or zero-length arrays should no longer be used[2].

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 include/linux/dma/ti-cppi5.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma/ti-cppi5.h b/include/linux/dma/ti-cppi5.h
index 5896441ee604..efa2f0309f00 100644
--- a/include/linux/dma/ti-cppi5.h
+++ b/include/linux/dma/ti-cppi5.h
@@ -47,7 +47,7 @@ struct cppi5_host_desc_t {
 	u32 buf_info1;
 	u32 org_buf_len;
 	u64 org_buf_ptr;
-	u32 epib[0];
+	u32 epib[];
 } __packed;
 
 #define CPPI5_DESC_MIN_ALIGN			(16U)
@@ -139,7 +139,7 @@ struct cppi5_desc_epib_t {
  */
 struct cppi5_monolithic_desc_t {
 	struct cppi5_desc_hdr_t hdr;
-	u32 epib[0];
+	u32 epib[];
 };
 
 #define CPPI5_INFO2_MDESC_DATA_OFFSET_SHIFT	(18U)
-- 
cgit v1.2.3


From 277ffd6c1ec0aa60856a03e18455fcca7d2a1186 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 31 Aug 2020 10:19:18 -0500
Subject: mailbox: zynqmp-ipi-message: Replace zero-length array with
 flexible-array member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having a
dynamically sized set of trailing elements in a structure. Kernel code should
always use “flexible array members”[1] for these cases. The older style of
one-element or zero-length arrays should no longer be used[2].

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 include/linux/mailbox/zynqmp-ipi-message.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mailbox/zynqmp-ipi-message.h b/include/linux/mailbox/zynqmp-ipi-message.h
index 9542b41eacfd..35ce84c8ca02 100644
--- a/include/linux/mailbox/zynqmp-ipi-message.h
+++ b/include/linux/mailbox/zynqmp-ipi-message.h
@@ -14,7 +14,7 @@
  */
 struct zynqmp_ipi_message {
 	size_t len;
-	u8 data[0];
+	u8 data[];
 };
 
 #endif /* _LINUX_ZYNQMP_IPI_MESSAGE_H_ */
-- 
cgit v1.2.3


From 883541051567a62add043a9f4ca5a31f2970bffd Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 31 Aug 2020 10:21:14 -0500
Subject: platform/chrome: cros_ec_commands: Replace zero-length array with
 flexible-array member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having a
dynamically sized set of trailing elements in a structure. Kernel code should
always use “flexible array members”[1] for these cases. The older style of
one-element or zero-length arrays should no longer be used[2].

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 include/linux/platform_data/cros_ec_commands.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
index 1fcfe9e63cb9..a3a9a878415f 100644
--- a/include/linux/platform_data/cros_ec_commands.h
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -1419,7 +1419,7 @@ struct ec_response_flash_info_2 {
 	uint16_t num_banks_total;
 	/* Number of banks described in banks array. */
 	uint16_t num_banks_desc;
-	struct ec_flash_bank banks[0];
+	struct ec_flash_bank banks[];
 } __ec_align4;
 
 /*
@@ -2420,12 +2420,12 @@ struct ec_response_motion_sense_fifo_info {
 	/* Total amount of vector lost */
 	uint16_t total_lost;
 	/* Lost events since the last fifo_info, per sensors */
-	uint16_t lost[0];
+	uint16_t lost[];
 } __ec_todo_packed;
 
 struct ec_response_motion_sense_fifo_data {
 	uint32_t number_data;
-	struct ec_response_motion_sensor_data data[0];
+	struct ec_response_motion_sensor_data data[];
 } __ec_todo_packed;
 
 /* List supported activity recognition */
@@ -3093,7 +3093,7 @@ struct ec_response_tmp006_get_calibration_v1 {
 	uint8_t algorithm;
 	uint8_t num_params;
 	uint8_t reserved[2];
-	float val[0];
+	float val[];
 } __ec_align4;
 
 struct ec_params_tmp006_set_calibration_v1 {
@@ -3101,7 +3101,7 @@ struct ec_params_tmp006_set_calibration_v1 {
 	uint8_t algorithm;
 	uint8_t num_params;
 	uint8_t reserved;
-	float val[0];
+	float val[];
 } __ec_align4;
 
 
@@ -5076,7 +5076,7 @@ struct ec_response_pd_log {
 	uint8_t type;       /* event type : see PD_EVENT_xx below */
 	uint8_t size_port;  /* [7:5] port number [4:0] payload size in bytes */
 	uint16_t data;      /* type-defined data payload */
-	uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */
+	uint8_t payload[];  /* optional additional data payload: 0..16 bytes */
 } __ec_align4;
 
 /* The timestamp is the microsecond counter shifted to get about a ms. */
@@ -5789,7 +5789,7 @@ struct ec_response_fp_encryption_status {
 
 struct ec_response_tp_frame_info {
 	uint32_t n_frames;
-	uint32_t frame_sizes[0];
+	uint32_t frame_sizes[];
 } __ec_align4;
 
 /* Create a snapshot of current frame readings */
-- 
cgit v1.2.3


From 120088832042e6dc9866160ff267f8c347bf53e6 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 31 Aug 2020 10:21:55 -0500
Subject: platform/chrome: cros_ec_proto: Replace zero-length array with
 flexible-array member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having a
dynamically sized set of trailing elements in a structure. Kernel code should
always use “flexible array members”[1] for these cases. The older style of
one-element or zero-length arrays should no longer be used[2].

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 include/linux/platform_data/cros_ec_proto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 4a415ae851ef..02599687770c 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -69,7 +69,7 @@ struct cros_ec_command {
 	uint32_t outsize;
 	uint32_t insize;
 	uint32_t result;
-	uint8_t data[0];
+	uint8_t data[];
 };
 
 /**
-- 
cgit v1.2.3


From 5e01fdff04b7f7c3b8d456c11c8a9f978b4ddf65 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 31 Aug 2020 08:25:42 -0500
Subject: fs: Replace zero-length array with flexible-array member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having a
dynamically sized set of trailing elements in a structure. Kernel code should
always use “flexible array members”[1] for these cases. The older style of
one-element or zero-length arrays should no longer be used[2].

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0bd126418bb6..21cc971fd960 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3285,7 +3285,7 @@ static inline ino_t parent_ino(struct dentry *dentry)
  */
 struct simple_transaction_argresp {
 	ssize_t size;
-	char data[0];
+	char data[];
 };
 
 #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
-- 
cgit v1.2.3


From 0d519cbf38eed4f895aed197d4b135fa7f60f7c2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 23 Oct 2020 15:10:37 +0200
Subject: debugfs: remove return value of debugfs_create_devm_seqfile()

No one checks the return value of debugfs_create_devm_seqfile(), as it's
not needed, so make the return value void, so that no one tries to do so
in the future.

Link: https://lore.kernel.org/r/20201023131037.2500765-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/debugfs.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 851dd1f9a8a5..d6c4cc9ecc77 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -144,10 +144,9 @@ void debugfs_create_u32_array(const char *name, umode_t mode,
 			      struct dentry *parent,
 			      struct debugfs_u32_array *array);
 
-struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name,
-					   struct dentry *parent,
-					   int (*read_fn)(struct seq_file *s,
-							  void *data));
+void debugfs_create_devm_seqfile(struct device *dev, const char *name,
+				 struct dentry *parent,
+				 int (*read_fn)(struct seq_file *s, void *data));
 
 bool debugfs_initialized(void);
 
@@ -327,13 +326,12 @@ static inline void debugfs_create_u32_array(const char *name, umode_t mode,
 {
 }
 
-static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev,
-							 const char *name,
-							 struct dentry *parent,
-					   int (*read_fn)(struct seq_file *s,
-							  void *data))
+static inline void debugfs_create_devm_seqfile(struct device *dev,
+					       const char *name,
+					       struct dentry *parent,
+					       int (*read_fn)(struct seq_file *s,
+							      void *data))
 {
-	return ERR_PTR(-ENODEV);
 }
 
 static inline ssize_t debugfs_read_file_bool(struct file *file,
-- 
cgit v1.2.3


From 46d6c5ae953cc0be38efd0e469284df7c4328cf8 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 29 Oct 2020 03:56:06 +0100
Subject: netfilter: use actual socket sk rather than skb sk when routing
 harder

If netfilter changes the packet mark when mangling, the packet is
rerouted using the route_me_harder set of functions. Prior to this
commit, there's one big difference between route_me_harder and the
ordinary initial routing functions, described in the comment above
__ip_queue_xmit():

   /* Note: skb->sk can be different from sk, in case of tunnels */
   int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,

That function goes on to correctly make use of sk->sk_bound_dev_if,
rather than skb->sk->sk_bound_dev_if. And indeed the comment is true: a
tunnel will receive a packet in ndo_start_xmit with an initial skb->sk.
It will make some transformations to that packet, and then it will send
the encapsulated packet out of a *new* socket. That new socket will
basically always have a different sk_bound_dev_if (otherwise there'd be
a routing loop). So for the purposes of routing the encapsulated packet,
the routing information as it pertains to the socket should come from
that socket's sk, rather than the packet's original skb->sk. For that
reason __ip_queue_xmit() and related functions all do the right thing.

One might argue that all tunnels should just call skb_orphan(skb) before
transmitting the encapsulated packet into the new socket. But tunnels do
*not* do this -- and this is wisely avoided in skb_scrub_packet() too --
because features like TSQ rely on skb->destructor() being called when
that buffer space is truely available again. Calling skb_orphan(skb) too
early would result in buffers filling up unnecessarily and accounting
info being all wrong. Instead, additional routing must take into account
the new sk, just as __ip_queue_xmit() notes.

So, this commit addresses the problem by fishing the correct sk out of
state->sk -- it's already set properly in the call to nf_hook() in
__ip_local_out(), which receives the sk as part of its normal
functionality. So we make sure to plumb state->sk through the various
route_me_harder functions, and then make correct use of it following the
example of __ip_queue_xmit().

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4.h |  2 +-
 include/linux/netfilter_ipv6.h | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 082e2c41b7ff..5b70ca868bb1 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -16,7 +16,7 @@ struct ip_rt_info {
 	u_int32_t mark;
 };
 
-int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
+int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned addr_type);
 
 struct nf_queue_entry;
 
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 9b67394471e1..48314ade1506 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -42,7 +42,7 @@ struct nf_ipv6_ops {
 #if IS_MODULE(CONFIG_IPV6)
 	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
 			const struct net_device *dev, int strict);
-	int (*route_me_harder)(struct net *net, struct sk_buff *skb);
+	int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb);
 	int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
 		       const struct in6_addr *daddr, unsigned int srcprefs,
 		       struct in6_addr *saddr);
@@ -143,9 +143,9 @@ static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
 #endif
 }
 
-int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
+int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb);
 
-static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
+static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 #if IS_MODULE(CONFIG_IPV6)
 	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
@@ -153,9 +153,9 @@ static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 	if (!v6_ops)
 		return -EHOSTUNREACH;
 
-	return v6_ops->route_me_harder(net, skb);
+	return v6_ops->route_me_harder(net, sk, skb);
 #elif IS_BUILTIN(CONFIG_IPV6)
-	return ip6_route_me_harder(net, skb);
+	return ip6_route_me_harder(net, sk, skb);
 #else
 	return -EHOSTUNREACH;
 #endif
-- 
cgit v1.2.3


From c0391b6ab810381df632677a1dcbbbbd63d05b6d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 29 Oct 2020 13:50:03 +0100
Subject: netfilter: nf_tables: missing validation from the abort path

If userspace does not include the trailing end of batch message, then
nfnetlink aborts the transaction. This allows to check that ruleset
updates trigger no errors.

After this patch, invoking this command from the prerouting chain:

 # nft -c add rule x y fib saddr . oif type local

fails since oif is not supported there.

This patch fixes the lack of rule validation from the abort/check path
to catch configuration errors such as the one above.

Fixes: a654de8fdc18 ("netfilter: nf_tables: fix chain dependency validation")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 89016d08f6a2..f6267e2883f2 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -24,6 +24,12 @@ struct nfnl_callback {
 	const u_int16_t attr_count;		/* number of nlattr's */
 };
 
+enum nfnl_abort_action {
+	NFNL_ABORT_NONE		= 0,
+	NFNL_ABORT_AUTOLOAD,
+	NFNL_ABORT_VALIDATE,
+};
+
 struct nfnetlink_subsystem {
 	const char *name;
 	__u8 subsys_id;			/* nfnetlink subsystem ID */
@@ -31,7 +37,8 @@ struct nfnetlink_subsystem {
 	const struct nfnl_callback *cb;	/* callback for individual types */
 	struct module *owner;
 	int (*commit)(struct net *net, struct sk_buff *skb);
-	int (*abort)(struct net *net, struct sk_buff *skb, bool autoload);
+	int (*abort)(struct net *net, struct sk_buff *skb,
+		     enum nfnl_abort_action action);
 	void (*cleanup)(struct net *net);
 	bool (*valid_genid)(struct net *net, u32 genid);
 };
-- 
cgit v1.2.3


From 290562075d4d9e85b7ff4104f9a634ffc3cccb69 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 27 Oct 2020 15:28:40 -0500
Subject: net/mlx5: Replace zero-length array with flexible-array member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having a
dynamically sized set of trailing elements in a structure. Kernel code should
always use “flexible array members”[1] for these cases. The older style of
one-element or zero-length arrays should no longer be used[2].

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 include/linux/mlx5/mlx5_ifc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 651591a2965d..a092346c7b2d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5823,7 +5823,7 @@ struct mlx5_ifc_alloc_modify_header_context_in_bits {
 	u8         reserved_at_68[0x10];
 	u8         num_of_actions[0x8];
 
-	union mlx5_ifc_set_add_copy_action_in_auto_bits actions[0];
+	union mlx5_ifc_set_add_copy_action_in_auto_bits actions[];
 };
 
 struct mlx5_ifc_dealloc_modify_header_context_out_bits {
@@ -9761,7 +9761,7 @@ struct mlx5_ifc_mcda_reg_bits {
 
 	u8         reserved_at_60[0x20];
 
-	u8         data[0][0x20];
+	u8         data[][0x20];
 };
 
 enum {
-- 
cgit v1.2.3


From e0e398e204634db8fb71bd89cf2f6e3e5bd09b51 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 21 Oct 2020 21:12:15 +0200
Subject: PM: runtime: Drop runtime PM references to supplier on link removal

While removing a device link, drop the supplier device's runtime PM
usage counter as many times as needed to drop all of the runtime PM
references to it from the consumer in addition to dropping the
consumer's link count.

Fixes: baa8809f6097 ("PM / runtime: Optimize the use of device links")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 5.1+ <stable@vger.kernel.org> # 5.1+
Tested-by: Xiang Chen <chenxiang66@hisilicon.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pm_runtime.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 18b02dcc168e..eadc1fdebce6 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -58,7 +58,7 @@ extern void pm_runtime_clean_up_links(struct device *dev);
 extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
-extern void pm_runtime_drop_link(struct device *dev);
+extern void pm_runtime_drop_link(struct device_link *link);
 
 /**
  * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter.
@@ -280,7 +280,7 @@ static inline void pm_runtime_clean_up_links(struct device *dev) {}
 static inline void pm_runtime_get_suppliers(struct device *dev) {}
 static inline void pm_runtime_put_suppliers(struct device *dev) {}
 static inline void pm_runtime_new_link(struct device *dev) {}
-static inline void pm_runtime_drop_link(struct device *dev) {}
+static inline void pm_runtime_drop_link(struct device_link *link) {}
 
 #endif /* !CONFIG_PM */
 
-- 
cgit v1.2.3


From d6e36668598154820177bfd78c1621d8e6c580a2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 21 Oct 2020 21:13:10 +0200
Subject: PM: runtime: Drop pm_runtime_clean_up_links()

After commit d12544fb2aa9 ("PM: runtime: Remove link state checks in
rpm_get/put_supplier()") nothing prevents the consumer device's
runtime PM from acquiring additional references to the supplier
device after pm_runtime_clean_up_links() has run (or even while it
is running), so calling this function from __device_release_driver()
may be pointless (or even harmful).

Moreover, it ignores stateless device links, so the runtime PM
handling of managed and stateless device links is inconsistent
because of it, so better get rid of it entirely.

Fixes: d12544fb2aa9 ("PM: runtime: Remove link state checks in rpm_get/put_supplier()")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 5.1+ <stable@vger.kernel.org> # 5.1+
Tested-by: Xiang Chen <chenxiang66@hisilicon.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pm_runtime.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index eadc1fdebce6..4b708f4e8eed 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -54,7 +54,6 @@ extern u64 pm_runtime_autosuspend_expiration(struct device *dev);
 extern void pm_runtime_update_max_time_suspended(struct device *dev,
 						 s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
-extern void pm_runtime_clean_up_links(struct device *dev);
 extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
@@ -276,7 +275,6 @@ static inline u64 pm_runtime_autosuspend_expiration(
 				struct device *dev) { return 0; }
 static inline void pm_runtime_set_memalloc_noio(struct device *dev,
 						bool enable){}
-static inline void pm_runtime_clean_up_links(struct device *dev) {}
 static inline void pm_runtime_get_suppliers(struct device *dev) {}
 static inline void pm_runtime_put_suppliers(struct device *dev) {}
 static inline void pm_runtime_new_link(struct device *dev) {}
-- 
cgit v1.2.3


From f8f6ae5d077a9bdaf5cbf2ac960a5d1a04b47482 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Sun, 1 Nov 2020 17:08:00 -0800
Subject: mm: always have io_remap_pfn_range() set pgprot_decrypted()

The purpose of io_remap_pfn_range() is to map IO memory, such as a
memory mapped IO exposed through a PCI BAR.  IO devices do not
understand encryption, so this memory must always be decrypted.
Automatically call pgprot_decrypted() as part of the generic
implementation.

This fixes a bug where enabling AMD SME causes subsystems, such as RDMA,
using io_remap_pfn_range() to expose BAR pages to user space to fail.
The CPU will encrypt access to those BAR pages instead of passing
unencrypted IO directly to the device.

Places not mapping IO should use remap_pfn_range().

Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: "Dave Young" <dyoung@redhat.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Toshimitsu Kani <toshi.kani@hpe.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/0-v1-025d64bdf6c4+e-amd_sme_fix_jgg@nvidia.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h      | 9 +++++++++
 include/linux/pgtable.h | 4 ----
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ef360fe70aaf..db6ae4d3fb4e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2759,6 +2759,15 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
 	return VM_FAULT_NOPAGE;
 }
 
+#ifndef io_remap_pfn_range
+static inline int io_remap_pfn_range(struct vm_area_struct *vma,
+				     unsigned long addr, unsigned long pfn,
+				     unsigned long size, pgprot_t prot)
+{
+	return remap_pfn_range(vma, addr, pfn, size, pgprot_decrypted(prot));
+}
+#endif
+
 static inline vm_fault_t vmf_error(int err)
 {
 	if (err == -ENOMEM)
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 38c33eabea89..71125a4676c4 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1427,10 +1427,6 @@ typedef unsigned int pgtbl_mod_mask;
 
 #endif /* !__ASSEMBLY__ */
 
-#ifndef io_remap_pfn_range
-#define io_remap_pfn_range remap_pfn_range
-#endif
-
 #ifndef has_transparent_hugepage
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define has_transparent_hugepage() 1
-- 
cgit v1.2.3


From 286228d382ba6320f04fa2e7c6fc8d4d92e428f4 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 18 Dec 2019 09:39:02 +0100
Subject: can: can_create_echo_skb(): fix echo skb generation: always use
 skb_clone()

All user space generated SKBs are owned by a socket (unless injected into the
key via AF_PACKET). If a socket is closed, all associated skbs will be cleaned
up.

This leads to a problem when a CAN driver calls can_put_echo_skb() on a
unshared SKB. If the socket is closed prior to the TX complete handler,
can_get_echo_skb() and the subsequent delivering of the echo SKB to all
registered callbacks, a SKB with a refcount of 0 is delivered.

To avoid the problem, in can_get_echo_skb() the original SKB is now always
cloned, regardless of shared SKB or not. If the process exists it can now
safely discard its SKBs, without disturbing the delivery of the echo SKB.

The problem shows up in the j1939 stack, when it clones the incoming skb, which
detects the already 0 refcount.

We can easily reproduce this with following example:

testj1939 -B -r can0: &
cansend can0 1823ff40#0123

WARNING: CPU: 0 PID: 293 at lib/refcount.c:25 refcount_warn_saturate+0x108/0x174
refcount_t: addition on 0; use-after-free.
Modules linked in: coda_vpu imx_vdoa videobuf2_vmalloc dw_hdmi_ahb_audio vcan
CPU: 0 PID: 293 Comm: cansend Not tainted 5.5.0-rc6-00376-g9e20dcb7040d #1
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c010f570>] (dump_backtrace) from [<c010f90c>] (show_stack+0x20/0x24)
[<c010f8ec>] (show_stack) from [<c0c3e1a4>] (dump_stack+0x8c/0xa0)
[<c0c3e118>] (dump_stack) from [<c0127fec>] (__warn+0xe0/0x108)
[<c0127f0c>] (__warn) from [<c01283c8>] (warn_slowpath_fmt+0xa8/0xcc)
[<c0128324>] (warn_slowpath_fmt) from [<c0539c0c>] (refcount_warn_saturate+0x108/0x174)
[<c0539b04>] (refcount_warn_saturate) from [<c0ad2cac>] (j1939_can_recv+0x20c/0x210)
[<c0ad2aa0>] (j1939_can_recv) from [<c0ac9dc8>] (can_rcv_filter+0xb4/0x268)
[<c0ac9d14>] (can_rcv_filter) from [<c0aca2cc>] (can_receive+0xb0/0xe4)
[<c0aca21c>] (can_receive) from [<c0aca348>] (can_rcv+0x48/0x98)
[<c0aca300>] (can_rcv) from [<c09b1fdc>] (__netif_receive_skb_one_core+0x64/0x88)
[<c09b1f78>] (__netif_receive_skb_one_core) from [<c09b2070>] (__netif_receive_skb+0x38/0x94)
[<c09b2038>] (__netif_receive_skb) from [<c09b2130>] (netif_receive_skb_internal+0x64/0xf8)
[<c09b20cc>] (netif_receive_skb_internal) from [<c09b21f8>] (netif_receive_skb+0x34/0x19c)
[<c09b21c4>] (netif_receive_skb) from [<c0791278>] (can_rx_offload_napi_poll+0x58/0xb4)

Fixes: 0ae89beb283a ("can: add destructor for self generated skbs")
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Link: http://lore.kernel.org/r/20200124132656.22156-1-o.rempel@pengutronix.de
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/skb.h | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index 900b9f4e0605..fc61cf4eff1c 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -61,21 +61,17 @@ static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
  */
 static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
 {
-	if (skb_shared(skb)) {
-		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+	struct sk_buff *nskb;
 
-		if (likely(nskb)) {
-			can_skb_set_owner(nskb, skb->sk);
-			consume_skb(skb);
-			return nskb;
-		} else {
-			kfree_skb(skb);
-			return NULL;
-		}
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (unlikely(!nskb)) {
+		kfree_skb(skb);
+		return NULL;
 	}
 
-	/* we can assume to have an unshared skb with proper owner */
-	return skb;
+	can_skb_set_owner(nskb, skb->sk);
+	consume_skb(skb);
+	return nskb;
 }
 
 #endif /* !_CAN_SKB_H */
-- 
cgit v1.2.3